update the free_gpu_mem_fraction for H100 qwen3 qa test (#5114)

Signed-off-by: root <root@eos0274.eos.clusters.nvidia.com>
Co-authored-by: root <root@eos0274.eos.clusters.nvidia.com>
This commit is contained in:
bhsueh_NV 2025-06-12 14:40:57 +08:00 committed by GitHub
parent 0daa70999a
commit 505678a286
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1373,7 +1373,7 @@ class TestQwen3_235B_A22B(LlmapiAccuracyTestHarness):
pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
use_cuda_graph=cuda_graph)
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
llm = LLM(
f"{llm_models_root()}/Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
tensor_parallel_size=tp_size,