From 505678a286f680855e123fc12a2fb65c384e2f5b Mon Sep 17 00:00:00 2001 From: bhsueh_NV <11360707+byshiue@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:40:57 +0800 Subject: [PATCH] update the free_gpu_mem_fraction for H100 qwen3 qa test (#5114) Signed-off-by: root Co-authored-by: root --- tests/integration/defs/accuracy/test_llm_api_pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 269c597120..b98f4ae712 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -1373,7 +1373,7 @@ class TestQwen3_235B_A22B(LlmapiAccuracyTestHarness): pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler, use_cuda_graph=cuda_graph) - kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9) + kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6) llm = LLM( f"{llm_models_root()}/Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf", tensor_parallel_size=tp_size,