[None][fix] update nemotron nas tests free_gpu_memory_fraction=0.8 (#6552)

Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2025-08-01 18:27:22 +08:00 committed by GitHub
parent 137413fbf4
commit fca0d37798
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1495,9 +1495,10 @@ class TestNemotronNas(LlmapiAccuracyTestHarness):
MODEL_NAME = "nemotron-nas/Llama-3_1-Nemotron-51B-Instruct"
MODEL_PATH = f"{llm_models_root()}/nemotron-nas/Llama-3_1-Nemotron-51B-Instruct"
@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.skip_less_device(8)
def test_auto_dtype_tp8(self):
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
pytorch_config = dict()
with LLM(self.MODEL_PATH,