update the free_gpu_mem_fraction for H100 qwen3 qa test (#5114)

Signed-off-by: root <root@eos0274.eos.clusters.nvidia.com> Co-authored-by: root <root@eos0274.eos.clusters.nvidia.com>
2026-01-14 06:27:45 +08:00 · 2025-06-12 14:40:57 +08:00 · 2025-06-12 14:40:57 +08:00 · 505678a286
commit 505678a286
parent 0daa70999a
1 changed files with 1 additions and 1 deletions
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@ -1373,7 +1373,7 @@ class TestQwen3_235B_A22B(LlmapiAccuracyTestHarness):
        pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                              use_cuda_graph=cuda_graph)

-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
        llm = LLM(
            f"{llm_models_root()}/Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
            tensor_parallel_size=tp_size,