mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-9086][doc] Clean up TODOs in documentation (#9292)
Signed-off-by: junq <22017000+QiJune@users.noreply.github.com> Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com> Signed-off-by: Mike Iovine <miovine@nvidia.com>
This commit is contained in:
parent
c6dc68a28e
commit
0915c4e3a1
@ -65,7 +65,6 @@ public:
|
||||
|
||||
//!
|
||||
//! \brief Returns the tensor n-th dimension. If n is negative, returns the (nbDims - n)th dimension.
|
||||
//! TODO: replace with constexpr parameter when moving to C++20.
|
||||
//!
|
||||
template <SizeType32 n>
|
||||
[[nodiscard]] DimType64 getDimension() const
|
||||
|
||||
@ -212,8 +212,6 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
|
||||
|
||||
Here is an example response, showing that the TensorRT LLM server reasons and answers the questions.
|
||||
|
||||
TODO: Use Chat Compeletions API / Responses API as the example after the PR is merged.
|
||||
|
||||
```json
|
||||
{"id":"chatcmpl-c5bf51b5cab94e10ba5da5266d12ee59","object":"chat.completion","created":1755815898,"model":"openai/gpt-oss-120b","choices":[{"index":0,"message":{"role":"assistant","content":"analysisThe user asks: \"Where is New York?\" Likely they want location info. Provide answer: New York State in northeastern US, New York City on the east coast, coordinates, etc. Provide context.assistantfinal**New York** can refer to two related places in the United States:\n\n| What it is | Where it is | Approx. coordinates | How to picture it |\n|------------|------------|--------------------|-------------------|\n| **New York State** | The northeastern corner of the United States, bordered by **Vermont, Massachusetts, Connecticut, New Jersey, Pennsylvania, and the Canadian provinces of Ontario and Quebec**. | 42.7° N, 75.5° W (roughly the state’s geographic centre) | A roughly rectangular state that stretches from the Atlantic Ocean in the southeast to the Adirondack Mountains and the Great Lakes region in the north. |\n| **New York City (NYC)** | The largest city in the state, located on the **southern tip of the state** where the **Hudson River meets the Atlantic Ocean**. It occupies five boroughs: Manhattan, Brooklyn, Queens, The Bronx, and Staten Island. | 40.7128° N, 74.0060° W | A dense, world‑famous metropolis that sits on a series of islands (Manhattan, Staten Island, parts of the Bronx) and the mainland (Brooklyn and Queens). |\n\n### Quick geographic context\n- **On a map of the United States:** New York State is in the **Northeast** region, just east of the Great Lakes and north of Pennsylvania. \n- **From Washington, D.C.:** Travel roughly **225 mi (360 km) northeast**. \n- **From Boston, MA:** Travel about **215 mi (350 km) southwest**. \n- **From Toronto, Canada:** Travel about **500 mi (800 km) southeast**.\n\n### Travel tips\n- **By air:** Major airports include **John F. Kennedy International (JFK)**, **LaGuardia (LGA)**, and **Newark Liberty International (EWR)** (the latter is actually in New Jersey but serves the NYC metro area). \n- **By train:** Amtrak’s **Northeast Corridor** runs from **Boston → New York City → Washington, D.C.** \n- **By car:** Interstates **I‑87** (north‑south) and **I‑90** (east‑west) are the primary highways crossing the state.\n\n### Fun fact\n- The name “**New York**” was given by the English in 1664, honoring the Duke of York (later King James II). The city’s original Dutch name was **“New Amsterdam.”**\n\nIf you need more specific directions (e.g., how to get to a particular neighborhood, landmark, or the state capital **Albany**), just let me know!","reasoning_content":null,"tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null,"mm_embedding_handle":null,"disaggregated_params":null,"avg_decoded_tokens_per_iter":1.0}],"usage":{"prompt_tokens":72,"total_tokens":705,"completion_tokens":633},"prompt_token_ids":null}
|
||||
```
|
||||
|
||||
@ -137,6 +137,24 @@ def demonstrate_multiple_sequences(prompt: str):
|
||||
print(f"Sequence {i+1}: {output.text}")
|
||||
|
||||
|
||||
def demonstrate_beam_search(prompt: str):
|
||||
"""Demonstrates beam search."""
|
||||
print("\n🎯 === BEAM SEARCH ===")
|
||||
beam_width = 2
|
||||
llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||
max_beam_width=beam_width)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
max_tokens=50,
|
||||
use_beam_search=True,
|
||||
n=beam_width,
|
||||
)
|
||||
|
||||
response = llm.generate(prompt, sampling_params)
|
||||
print(f"Prompt: {prompt}")
|
||||
print(f"Response: {response.outputs[0].text}")
|
||||
|
||||
|
||||
def demonstrate_with_logprobs(prompt: str):
|
||||
"""Demonstrates generation with log probabilities."""
|
||||
print("\n📊 === GENERATION WITH LOG PROBABILITIES ===")
|
||||
@ -173,9 +191,8 @@ def run_all_demonstrations(model_path: Optional[str] = None):
|
||||
demonstrate_top_k_sampling(demo_prompt)
|
||||
demonstrate_top_p_sampling(demo_prompt)
|
||||
demonstrate_combined_sampling(demo_prompt)
|
||||
# TODO[Superjomn]: enable them once pytorch backend supports
|
||||
# demonstrate_multiple_sequences(llm, demo_prompt)
|
||||
# demonstrate_beam_search(demo_prompt)
|
||||
demonstrate_multiple_sequences(demo_prompt)
|
||||
demonstrate_beam_search(demo_prompt)
|
||||
demonstrate_with_logprobs(demo_prompt)
|
||||
|
||||
print("\n🎉 All sampling demonstrations completed!")
|
||||
@ -219,6 +236,8 @@ def main(model: Optional[str], demo: str, prompt: Optional[str]):
|
||||
demonstrate_combined_sampling(demo_prompt)
|
||||
elif demo == "multiple":
|
||||
demonstrate_multiple_sequences(demo_prompt)
|
||||
elif demo == "beam":
|
||||
demonstrate_beam_search(demo_prompt)
|
||||
elif demo == "logprobs":
|
||||
demonstrate_with_logprobs(demo_prompt)
|
||||
elif demo == "all":
|
||||
|
||||
@ -3279,8 +3279,6 @@ def identity(input: Tensor) -> Tensor:
|
||||
'''
|
||||
Add an identity operation.
|
||||
|
||||
TODO: Document why it can be done using a plugin!!!
|
||||
|
||||
Parameters:
|
||||
input : Tensor
|
||||
The input tensor.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user