mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[None][fix] Update branch with the correct extra_attrs fix (#9857)
Signed-off-by: Jonas Li <6110159+longlee0622@users.noreply.github.com> Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com> Co-authored-by: Yukun He <23156053+hyukn@users.noreply.github.com>
This commit is contained in:
parent
92997d608f
commit
b05481107c
@ -43,7 +43,7 @@ class AutoModelForCausalLM(Generic[TModel, TConfig]):
|
||||
config._frozen = False
|
||||
config.skip_create_weights_in_init = True
|
||||
config._frozen = True
|
||||
extra_attrs = {}
|
||||
extra_attrs = config.extra_attrs
|
||||
with model_extra_attrs(extra_attrs):
|
||||
model = cls(config)
|
||||
model.extra_attrs = extra_attrs
|
||||
|
||||
@ -2098,12 +2098,6 @@ class Linear(nn.Module):
|
||||
'cutlass', 'cublaslt', 'cuda_core'
|
||||
]
|
||||
|
||||
# add a environment variable to override the nvfp4_allowed_backends
|
||||
if os.environ.get("TLLM_OVERRIDE_NVFP4_ALLOWED_BACKENDS"):
|
||||
# check that all the backends in override_nvfp4_allowed_backends are in the self.nvfp4_allowed_backends
|
||||
self.nvfp4_allowed_backends = os.environ.get(
|
||||
"TLLM_OVERRIDE_NVFP4_ALLOWED_BACKENDS").split(",")
|
||||
|
||||
local_in_features = in_features
|
||||
local_out_features = out_features
|
||||
|
||||
|
||||
@ -444,5 +444,4 @@ disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backen
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5722653)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8] SKIP (https://nvbugs/5722653)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5722653)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user