[None][fix] Update branch with the correct extra_attrs fix (#9857)

Signed-off-by: Jonas Li <6110159+longlee0622@users.noreply.github.com> Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com> Co-authored-by: Yukun He <23156053+hyukn@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-12-10 10:42:35 +08:00 · 2025-12-10 10:42:35 +08:00 · b05481107c
commit b05481107c
parent 92997d608f
3 changed files with 2 additions and 9 deletions
--- a/tensorrt_llm/_torch/models/modeling_auto.py
+++ b/tensorrt_llm/_torch/models/modeling_auto.py
@ -43,7 +43,7 @@ class AutoModelForCausalLM(Generic[TModel, TConfig]):
            config._frozen = False
            config.skip_create_weights_in_init = True
            config._frozen = True
-        extra_attrs = {}
+        extra_attrs = config.extra_attrs
        with model_extra_attrs(extra_attrs):
            model = cls(config)
        model.extra_attrs = extra_attrs
--- a/tensorrt_llm/_torch/modules/linear.py
+++ b/tensorrt_llm/_torch/modules/linear.py
@ -2098,12 +2098,6 @@ class Linear(nn.Module):
            'cutlass', 'cublaslt', 'cuda_core'
        ]

-        # add a environment variable to override the nvfp4_allowed_backends
-        if os.environ.get("TLLM_OVERRIDE_NVFP4_ALLOWED_BACKENDS"):
-            # check that all the backends in override_nvfp4_allowed_backends are in the self.nvfp4_allowed_backends
-            self.nvfp4_allowed_backends = os.environ.get(
-                "TLLM_OVERRIDE_NVFP4_ALLOWED_BACKENDS").split(",")
-
        local_in_features = in_features
        local_out_features = out_features

--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -444,5 +444,4 @@ disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backen
 disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5722653)
 disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8] SKIP (https://nvbugs/5722653)
 disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5722653)
-accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
-accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)