mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-16 15:55:08 +08:00
[None][chore] update model list (#11364)
Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
This commit is contained in:
parent
4a743338c3
commit
ea81a03dd1
@ -1,4 +1,4 @@
|
||||
# Configuration for DeepSeek V3 and R1 with reduced layers
|
||||
# Full models are too large, so we test with limited layers
|
||||
model_kwargs:
|
||||
num_hidden_layers: 10
|
||||
num_hidden_layers: 5
|
||||
@ -0,0 +1,5 @@
|
||||
# Configuration for Qwen3-VL models
|
||||
# Forces consistent dtype to avoid BFloat16/Float32 mismatch
|
||||
|
||||
model_kwargs:
|
||||
torch_dtype: bfloat16
|
||||
@ -18,8 +18,9 @@ models:
|
||||
# DISABLED: TorchDynamo compilation error - fake tensor dispatch failure
|
||||
# - name: apple/OpenELM-3B-Instruct
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'openelm.yaml']
|
||||
- name: microsoft/Phi-4-mini-instruct
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
|
||||
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
|
||||
# - name: microsoft/Phi-4-mini-instruct
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
|
||||
- name: microsoft/Phi-4-mini-reasoning
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
|
||||
- name: google/gemma-3-1b-it
|
||||
@ -115,8 +116,9 @@ models:
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
|
||||
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
|
||||
- name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
|
||||
# DISABLED: NVFP4 quantization not supported for pre BLW - CW has only Hopper
|
||||
# - name: nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
|
||||
- name: nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-FP8
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
|
||||
- name: google/gemma-3-27b-it
|
||||
@ -126,10 +128,6 @@ models:
|
||||
# DISABLED: Network timeout downloading from Hugging Face
|
||||
# - name: ai21labs/AI21-Jamba-1.5-Mini
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
|
||||
- name: zai-org/glm-4v-9b
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
|
||||
# - name: zai-org/GLM-4.7
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
|
||||
- name: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
|
||||
- name: meta-llama/Llama-3.3-70B-Instruct
|
||||
@ -166,8 +164,8 @@ models:
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
|
||||
- name: nvidia/OpenReasoning-Nemotron-32B
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
|
||||
- name: mistralai/Mistral-Large-Instruct-v2.1
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
|
||||
- name: mistralai/Mistral-Large-Instruct-2407
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
|
||||
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
|
||||
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
|
||||
@ -186,26 +184,30 @@ models:
|
||||
# DISABLED: Model loading failure - dynamic module registry issue
|
||||
# - name: nvidia/Llama-3_1-Nemotron-51B-Instruct
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
- name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
- name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
|
||||
# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
|
||||
# - name: nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
# DISABLED: model not supporting installed transformers version - https://github.com/NVIDIA/TensorRT-LLM/issues/10980
|
||||
# - name: nvidia/Llama-3_3-Nemotron-Super-49B-v1
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
- name: Qwen/Qwen3-30B-A3B
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
- name: Qwen/Qwen3-235B-A22B
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
|
||||
- name: deepseek-ai/DeepSeek-R1
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
|
||||
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
|
||||
# - name: deepseek-ai/DeepSeek-R1
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
|
||||
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
|
||||
# - name: deepseek-ai/DeepSeek-V3
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'deepseek_v3_lite.yaml']
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
|
||||
# DISABLED: Auto-deploy compilation error - shape mismatch - https://github.com/NVIDIA/TensorRT-LLM/issues/10978
|
||||
# - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
|
||||
- name: Qwen/Qwen3-VL-8B-Instruct
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
|
||||
# DISABLED: NOT SUPPORTED - https://github.com/NVIDIA/TensorRT-LLM/issues/10363
|
||||
# - name: Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
|
||||
# yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
|
||||
@ -214,7 +216,7 @@ models:
|
||||
- name: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
|
||||
- name: openai/gpt-oss-120b
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
|
||||
- name: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
|
||||
- name: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
|
||||
Loading…
Reference in New Issue
Block a user