Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
This commit is contained in:
Xiwen Yu 2025-09-10 10:34:18 +08:00
parent 5f508b7d43
commit 2e61526d12
3 changed files with 10 additions and 8 deletions

View File

@ -573,8 +573,8 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
// Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
// "Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
// pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
"Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
"Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(

View File

@ -91,7 +91,7 @@ def _mistral_forward(
pixel_values: torch.Tensor,
image_sizes: Optional[torch.Tensor],
):
return inputs_embeds
return inputs_embeds.clone()
def _vision_branch(
# ! The type annotations in the original transformers code are all wrong.

View File

@ -65,11 +65,11 @@ def _check_ad_config(experiment_config: ExperimentConfig, llm_args: LlmArgs):
compile_backend="torch-simple",
),
# disabled due to https://nvbugspro.nvidia.com/bug/5505835
# get_small_model_config(
# "meta-llama/Llama-4-Scout-17B-16E-Instruct",
# attn_backend="flashinfer",
# compile_backend="torch-simple",
# ),
get_small_model_config(
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
attn_backend="flashinfer",
compile_backend="torch-simple",
),
get_small_model_config(
"deepseek-ai/DeepSeek-V3",
attn_backend="triton",
@ -97,6 +97,8 @@ def _check_ad_config(experiment_config: ExperimentConfig, llm_args: LlmArgs):
],
)
def test_build_ad(experiment_config: Dict):
if "Llama-4" in experiment_config["args"]["model"]:
pytest.skip("https://nvbugspro.nvidia.com/bug/5505835")
experiment_config["args"]["runtime"] = "demollm" # Default runtime set to demollm
experiment_config["args"]["world_size"] = 0 # Default world_size set to 0
experiment_config = ExperimentConfig(**experiment_config)