mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
update the free_gpu_mem_fraction for H100 qwen3 qa test (#5114)
Signed-off-by: root <root@eos0274.eos.clusters.nvidia.com> Co-authored-by: root <root@eos0274.eos.clusters.nvidia.com>
This commit is contained in:
parent
0daa70999a
commit
505678a286
@ -1373,7 +1373,7 @@ class TestQwen3_235B_A22B(LlmapiAccuracyTestHarness):
|
||||
pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
|
||||
use_cuda_graph=cuda_graph)
|
||||
|
||||
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
|
||||
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
|
||||
llm = LLM(
|
||||
f"{llm_models_root()}/Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
|
||||
tensor_parallel_size=tp_size,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user