TensorRT-LLMs/examples/bindings/executor/example_debug.py

import argparse
import pathlib as pl

import numpy as np

import tensorrt_llm.bindings.executor as trtllm

# This example hows to use the python bindings to create an executor, enqueue a
# request, and get the generated tokens.

# First, follow the steps in README.md to generate the engines.

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Executor Bindings Example")
    parser.add_argument("--model_path",
                        type=str,
                        required=True,
                        help="Directory containing model engine")
    parser.add_argument("--dump_tensors",
                        action="store_true",
                        help="Dump debug tensors to files")
    args = parser.parse_args()

    max_tokens = 2

    # Select which tensors should be kept or dumped
    debug_config = trtllm.DebugConfig(
        debug_tensor_names=["sequence_length"],
        debug_tensors_max_iterations=0 if args.dump_tensors else max_tokens)

    # Create the executor.
    executor = trtllm.Executor(
        args.model_path, trtllm.ModelType.DECODER_ONLY,
        trtllm.ExecutorConfig(1, debug_config=debug_config))

    if executor.can_enqueue_requests():
        # Create the request.
        request = trtllm.Request(input_token_ids=[1, 2, 3, 4],
                                 max_tokens=max_tokens)

        # Enqueue the request.
        request_id = executor.enqueue_request(request)

        # Wait for the new tokens.
        responses = executor.await_responses(request_id)
        output_tokens = responses[0].result.output_token_ids

        # Print tokens.
        print(output_tokens)

    if args.dump_tensors:
        print("debug tensors from files:")
        debug_dir = pl.Path("/tmp/tllm_debug/PP_1/TP_1")
        if debug_dir.is_dir():
            for iter_dir in [x for x in debug_dir.iterdir() if x.is_dir()]:
                print(iter_dir.name)
                for file in [x for x in iter_dir.iterdir() if x.is_file()]:
                    print(file.name, np.load(file))
        else:
            print("debug dir not found")
    else:
        print("debug tensors from queue:")
        debug_tensors = executor.get_latest_debug_tensors()
        for debug_iter in debug_tensors:
            print(f"iteration {debug_iter.iter}")
            for [name, tensor] in debug_iter.debug_tensors.items():
                print(name, tensor)