{ "entries": [ { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "deepseek-ai/DeepSeek-R1-0528", "model_display_name": "DeepSeek-R1", "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", "concurrency": 128, "config_filename": "1k1k_tp4_conc128.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", "concurrency": 256, "config_filename": "1k1k_tp4_conc256.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", "concurrency": 128, "config_filename": "8k1k_tp4_conc128.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", "concurrency": 256, "config_filename": "8k1k_tp4_conc256.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 4, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", "concurrency": 128, "config_filename": "1k1k_tp8_conc128.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", "concurrency": 256, "config_filename": "1k1k_tp8_conc256.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", "concurrency": 128, "config_filename": "8k1k_tp8_conc128.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", "concurrency": 256, "config_filename": "8k1k_tp8_conc256.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "nvidia/DeepSeek-R1-0528-FP4-v2", "model_display_name": "DeepSeek-R1 (NVFP4)", "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp1_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp1_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp1_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp1_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp1_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp1_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp1_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp1_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp1_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp1_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp1_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp1_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp1_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp1_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp1_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "B200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp2_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp2_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp2_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp2_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp2_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp2_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp2_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp2_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp2_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp2_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp2_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp2_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp2_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp2_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp2_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "2xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "4xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", "gpu": "B200_NVL", "gpu_display": "8xB200_NVL", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp1_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp1_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp1_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp1_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp1_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp1_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp1_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp1_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp1_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp1_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp1_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp1_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp1_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp1_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp1_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "H200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 1, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp2_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp2_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp2_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp2_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp2_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp2_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp2_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp2_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp2_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp2_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp2_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp2_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp2_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp2_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp2_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "2xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 2, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp4_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp4_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp4_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp4_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp4_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "4xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 4, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "1k8k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "1k8k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "1k8k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "1k8k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "1k8k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 1024, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 8192, "performance_profile": "Max Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", "concurrency": 4, "config_filename": "8k1k_tp8_conc4.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Min Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", "concurrency": 8, "config_filename": "8k1k_tp8_conc8.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Low Latency" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", "concurrency": 16, "config_filename": "8k1k_tp8_conc16.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Balanced" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", "concurrency": 32, "config_filename": "8k1k_tp8_conc32.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "High Throughput" }, { "command": "trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", "concurrency": 64, "config_filename": "8k1k_tp8_conc64.yaml", "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", "gpu": "H200_SXM", "gpu_display": "8xH200_SXM", "isl": 8192, "model": "openai/gpt-oss-120b", "model_display_name": "gpt-oss-120b", "model_url": "https://huggingface.co/openai/gpt-oss-120b", "num_gpus": 8, "osl": 1024, "performance_profile": "Max Throughput" } ], "models": { "deepseek-ai/DeepSeek-R1-0528": { "display_name": "DeepSeek-R1", "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528" }, "nvidia/DeepSeek-R1-0528-FP4-v2": { "display_name": "DeepSeek-R1 (NVFP4)", "url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2" }, "openai/gpt-oss-120b": { "display_name": "gpt-oss-120b", "url": "https://huggingface.co/openai/gpt-oss-120b" } }, "source": "examples/configs/database/lookup.yaml" }