mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][fix] update nemotron nas tests free_gpu_memory_fraction=0.8 (#6552)
Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
parent
137413fbf4
commit
fca0d37798
@ -1495,9 +1495,10 @@ class TestNemotronNas(LlmapiAccuracyTestHarness):
|
||||
MODEL_NAME = "nemotron-nas/Llama-3_1-Nemotron-51B-Instruct"
|
||||
MODEL_PATH = f"{llm_models_root()}/nemotron-nas/Llama-3_1-Nemotron-51B-Instruct"
|
||||
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@pytest.mark.skip_less_device(8)
|
||||
def test_auto_dtype_tp8(self):
|
||||
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
|
||||
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
|
||||
pytorch_config = dict()
|
||||
|
||||
with LLM(self.MODEL_PATH,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user