mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[None][chore] docs: clarify LoRA is not supported with --use_fp8_rowwise in Fp8RowwiseAttention (see #2603) (#10320)
Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com> Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Co-authored-by: Kanghwan <861393+karljang@users.noreply.github.com> Co-authored-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
This commit is contained in:
parent
442d2e8a15
commit
64ff5cac52
@ -1967,7 +1967,11 @@ class Fp8RowwiseAttention(Module):
|
||||
lora_layer_params=None,
|
||||
all_reduce_params: Optional[AllReduceParams] = None,
|
||||
):
|
||||
assert lora_layer_params is None, f"lora is not supported on {self.__class__.__name__} now"
|
||||
assert lora_layer_params is None, (
|
||||
f"LoRA is not supported by {self.__class__.__name__} (e.g., --use_fp8_rowwise). "
|
||||
"If you need LoRA support, please use a non-quantized (e.g., bf16) attention implementation. "
|
||||
"See https://github.com/NVIDIA/TensorRT-LLM/issues/2603 for details."
|
||||
)
|
||||
qkv = self.qkv(hidden_states)
|
||||
|
||||
alibi_slopes = None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user