From aa1fe931de43b50d639506e1d8ebf40177194dbb Mon Sep 17 00:00:00 2001 From: Venky <23023424+venkywonka@users.noreply.github.com> Date: Tue, 6 Jan 2026 08:35:47 +0530 Subject: [PATCH] [None][docs] Add `--config` preference over `--extra_llm_api_options` in CODING_GUIDELINES.md (#10426) Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com> --- CODING_GUIDELINES.md | 8 ++++++++ docs/source/features/sparse-attention.md | 4 ++-- examples/models/core/nemotron/README_nemotron_nano_v3.md | 6 +++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CODING_GUIDELINES.md b/CODING_GUIDELINES.md index 906e0558be..70f0c1bfbe 100644 --- a/CODING_GUIDELINES.md +++ b/CODING_GUIDELINES.md @@ -487,6 +487,14 @@ else: f.read() ``` +## Documentation Guidelines + +#### CLI Options in Documentation +1. When documenting CLI commands for `trtllm-serve`, `trtllm-bench`, `trtllm-eval`, or similar tools, prefer using `--config` over `--extra_llm_api_options` for specifying configuration files. + - `--config` is the preferred, shorter alias for configuration file options. + - Example: `trtllm-serve --model --config config.yaml` (preferred) + - Avoid: `trtllm-serve --model --extra_llm_api_options config.yaml` + ## NVIDIA Copyright 1. All TensorRT-LLM Open Source Software code should contain an NVIDIA copyright header that includes the year of its latest meaningful modification. The following block of text should be prepended to the top of all files. This includes .cpp, .h, .cu, .py, and any other source files which are compiled or interpreted. diff --git a/docs/source/features/sparse-attention.md b/docs/source/features/sparse-attention.md index 6682f16b65..4458a2fe70 100644 --- a/docs/source/features/sparse-attention.md +++ b/docs/source/features/sparse-attention.md @@ -88,13 +88,13 @@ enable_chunked_prefill: false Run the command with the config file: ```bash -trtllm-bench/trtllm-serve --model --extra_llm_api_options extra_config.yaml ... +trtllm-bench/trtllm-serve --model --config extra_config.yaml ... ``` For example, users can evaluate a model with trtllm-eval on LongBenchV2 task like this: ```bash -trtllm-eval --model --extra_llm_api_options extra_config.yaml longbench_v2 --max_output_length 1024 ... +trtllm-eval --model --config extra_config.yaml longbench_v2 --max_output_length 1024 ... ``` ## Developer Guide diff --git a/examples/models/core/nemotron/README_nemotron_nano_v3.md b/examples/models/core/nemotron/README_nemotron_nano_v3.md index dac512f47e..d60a76ec5e 100644 --- a/examples/models/core/nemotron/README_nemotron_nano_v3.md +++ b/examples/models/core/nemotron/README_nemotron_nano_v3.md @@ -83,7 +83,7 @@ TRTLLM_ENABLE_PDL=1 trtllm-serve \ --port 8000 \ --backend _autodeploy \ --trust_remote_code \ ---extra_llm_api_options nano_v3.yaml +--config nano_v3.yaml # OR you can launch trtllm-server to support reasoning content parsing. TRTLLM_ENABLE_PDL=1 trtllm-serve \ @@ -92,7 +92,7 @@ TRTLLM_ENABLE_PDL=1 trtllm-serve \ --backend _autodeploy \ --trust_remote_code \ --reasoning_parser nano-v3 \ ---extra_llm_api_options nano_v3.yaml +--config nano_v3.yaml # OR you can launch trtllm-server to support tool-calling. TRTLLM_ENABLE_PDL=1 trtllm-serve \ @@ -102,7 +102,7 @@ TRTLLM_ENABLE_PDL=1 trtllm-serve \ --trust_remote_code \ --reasoning_parser nano-v3 \ --tool_parser qwen3_coder \ ---extra_llm_api_options nano_v3.yaml +--config nano_v3.yaml ``` For the client: