mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
fix
Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
This commit is contained in:
parent
5f508b7d43
commit
2e61526d12
@ -573,8 +573,8 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
|
||||
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
|
||||
// Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
|
||||
// "Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
|
||||
// pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
|
||||
"Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
|
||||
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
|
||||
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
|
||||
"Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
|
||||
@ -91,7 +91,7 @@ def _mistral_forward(
|
||||
pixel_values: torch.Tensor,
|
||||
image_sizes: Optional[torch.Tensor],
|
||||
):
|
||||
return inputs_embeds
|
||||
return inputs_embeds.clone()
|
||||
|
||||
def _vision_branch(
|
||||
# ! The type annotations in the original transformers code are all wrong.
|
||||
|
||||
@ -65,11 +65,11 @@ def _check_ad_config(experiment_config: ExperimentConfig, llm_args: LlmArgs):
|
||||
compile_backend="torch-simple",
|
||||
),
|
||||
# disabled due to https://nvbugspro.nvidia.com/bug/5505835
|
||||
# get_small_model_config(
|
||||
# "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||
# attn_backend="flashinfer",
|
||||
# compile_backend="torch-simple",
|
||||
# ),
|
||||
get_small_model_config(
|
||||
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||
attn_backend="flashinfer",
|
||||
compile_backend="torch-simple",
|
||||
),
|
||||
get_small_model_config(
|
||||
"deepseek-ai/DeepSeek-V3",
|
||||
attn_backend="triton",
|
||||
@ -97,6 +97,8 @@ def _check_ad_config(experiment_config: ExperimentConfig, llm_args: LlmArgs):
|
||||
],
|
||||
)
|
||||
def test_build_ad(experiment_config: Dict):
|
||||
if "Llama-4" in experiment_config["args"]["model"]:
|
||||
pytest.skip("https://nvbugspro.nvidia.com/bug/5505835")
|
||||
experiment_config["args"]["runtime"] = "demollm" # Default runtime set to demollm
|
||||
experiment_config["args"]["world_size"] = 0 # Default world_size set to 0
|
||||
experiment_config = ExperimentConfig(**experiment_config)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user