From ddbaa5ef801b2d4d412610f772bd77a8f01fc5dc Mon Sep 17 00:00:00 2001 From: Netanel Haber <58652339+netanel-haber@users.noreply.github.com> Date: Thu, 5 Jun 2025 08:30:17 +0300 Subject: [PATCH] Only pass `fast_build=true` to non-pytorch backend (#4920) Signed-off-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com> --- tests/unittest/llmapi/test_llm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unittest/llmapi/test_llm.py b/tests/unittest/llmapi/test_llm.py index b2b22aa1e0..19142ab13b 100644 --- a/tests/unittest/llmapi/test_llm.py +++ b/tests/unittest/llmapi/test_llm.py @@ -1642,9 +1642,12 @@ def llm_return_logprobs_test_harness(prompt_logprobs: Optional[int], streaming=False, backend=None): LLM_CLASS = LLM + llm_extra_kwargs = {} if backend == "pytorch": from tensorrt_llm._torch import LLM as LLM_torch LLM_CLASS = LLM_torch + else: + llm_extra_kwargs["fast_build"] = True llm = LLM_CLASS( llama_model_path, @@ -1652,6 +1655,7 @@ def llm_return_logprobs_test_harness(prompt_logprobs: Optional[int], build_config=BuildConfig(gather_context_logits=True), tensor_parallel_size=tp_size, gather_generation_logits=True, + **llm_extra_kwargs, ) prompts = ["A B C D E F G H I J K"] @@ -1950,7 +1954,6 @@ def test_llm_get_queued_stats(): llm = LLM_CLASS(model=llama_model_path, kv_cache_config=global_kvcache_config, tensor_parallel_size=tp_size, - fast_build=True, max_batch_size=1, **llm_args_extra)