### Control generated text using logits post processor import typing as tp import torch from tensorrt_llm import LLM, SamplingParams # Define the logits post-processor callback. This simple callback will output # a specific token at each step irrespective of prompt. # Refer to ../bindings/executor/example_logits_processor.py for a more # sophisticated callback that generates JSON structured output. def logits_post_processor(req_id: int, logits: torch.Tensor, ids: tp.List[tp.List[int]], stream_ptr: int, client_id: tp.Optional[int]): target_token_id = 42 with torch.cuda.stream(torch.cuda.ExternalStream(stream_ptr)): logits[:] = float("-inf") logits[..., target_token_id] = 0 # Several callbacks can be specified when initializing LLM llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", logits_post_processor_map={"my_logits_pp": logits_post_processor}) # Sample prompts prompts = [ "Hello, my name is", "The president of the United States is", ] # Generate text for prompt_id, prompt in enumerate(prompts): # We will use logits post processor callback only for odd-numbered prompts if prompt_id % 2 == 0: sampling_params = SamplingParams(temperature=0.8, top_p=0.95) else: # Each prompt can use one callback from the choices that were provided to LLM sampling_params = SamplingParams( temperature=0.8, top_p=0.95, logits_post_processor_name='my_logits_pp') for output in llm.generate([prompt], sampling_params): print( f"Prompt: {output.prompt!r}, Generated text: {output.outputs[0].text!r}" ) # Got output like # Prompt: 'Hello, my name is', Generated text: '\n\nJane Smith. I am a student pursuing my degree in Computer Science at [university]. I enjoy learning new things, especially technology and programming' # Prompt: 'The president of the United States is', Generated text: "''''''''''''''''''''''''''''''''"