mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-23 12:12:39 +08:00
2876 lines
160 KiB
JSON
2876 lines
160 KiB
JSON
{
|
|
"entries": [
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "deepseek-ai/DeepSeek-R1-0528",
|
|
"model_display_name": "DeepSeek-R1",
|
|
"model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
|
|
"concurrency": 128,
|
|
"config_filename": "1k1k_tp4_conc128.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
|
|
"concurrency": 256,
|
|
"config_filename": "1k1k_tp4_conc256.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
|
|
"concurrency": 128,
|
|
"config_filename": "8k1k_tp4_conc128.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
|
|
"concurrency": 256,
|
|
"config_filename": "8k1k_tp4_conc256.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
|
|
"concurrency": 128,
|
|
"config_filename": "1k1k_tp8_conc128.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
|
|
"concurrency": 256,
|
|
"config_filename": "1k1k_tp8_conc256.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
|
|
"concurrency": 128,
|
|
"config_filename": "8k1k_tp8_conc128.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
|
|
"concurrency": 256,
|
|
"config_filename": "8k1k_tp8_conc256.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
|
|
"config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"model_display_name": "DeepSeek-R1 (NVFP4)",
|
|
"model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp1_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp1_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp1_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp1_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp1_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp1_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp1_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp1_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp1_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp1_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp1_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp1_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp1_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp1_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp1_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "B200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp2_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp2_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp2_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp2_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp2_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp2_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp2_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp2_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp2_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp2_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp2_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp2_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp2_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp2_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp2_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "2xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "4xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
|
|
"gpu": "B200_NVL",
|
|
"gpu_display": "8xB200_NVL",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp1_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp1_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp1_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp1_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp1_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp1_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp1_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp1_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp1_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp1_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp1_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp1_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp1_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp1_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp1_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "H200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 1,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp2_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp2_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp2_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp2_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp2_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp2_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp2_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp2_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp2_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp2_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp2_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp2_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp2_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp2_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp2_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "2xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 2,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp4_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp4_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp4_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp4_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp4_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "4xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 4,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "1k8k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "1k8k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "1k8k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "1k8k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "1k8k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 1024,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 8192,
|
|
"performance_profile": "Max Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
|
|
"concurrency": 4,
|
|
"config_filename": "8k1k_tp8_conc4.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Min Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
|
|
"concurrency": 8,
|
|
"config_filename": "8k1k_tp8_conc8.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Low Latency"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
|
|
"concurrency": 16,
|
|
"config_filename": "8k1k_tp8_conc16.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Balanced"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
|
|
"concurrency": 32,
|
|
"config_filename": "8k1k_tp8_conc32.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "High Throughput"
|
|
},
|
|
{
|
|
"command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
|
|
"concurrency": 64,
|
|
"config_filename": "8k1k_tp8_conc64.yaml",
|
|
"config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
|
|
"config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
|
|
"config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
|
|
"gpu": "H200_SXM",
|
|
"gpu_display": "8xH200_SXM",
|
|
"isl": 8192,
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_display_name": "gpt-oss-120b",
|
|
"model_url": "https://huggingface.co/openai/gpt-oss-120b",
|
|
"num_gpus": 8,
|
|
"osl": 1024,
|
|
"performance_profile": "Max Throughput"
|
|
}
|
|
],
|
|
"models": {
|
|
"deepseek-ai/DeepSeek-R1-0528": {
|
|
"display_name": "DeepSeek-R1",
|
|
"url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528"
|
|
},
|
|
"nvidia/DeepSeek-R1-0528-FP4-v2": {
|
|
"display_name": "DeepSeek-R1 (NVFP4)",
|
|
"url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2"
|
|
},
|
|
"openai/gpt-oss-120b": {
|
|
"display_name": "gpt-oss-120b",
|
|
"url": "https://huggingface.co/openai/gpt-oss-120b"
|
|
}
|
|
},
|
|
"source": "examples/configs/database/lookup.yaml"
|
|
}
|