TensorRT-LLMs/examples/bindings/executor/example_debug.py
2024-09-03 12:14:23 +02:00

53 lines
1.8 KiB
Python

import argparse
import pathlib as pl
import numpy as np
import tensorrt_llm.bindings.executor as trtllm
# This example hows to use the python bindings to create an executor, enqueue a
# request, and get the generated tokens.
# First, follow the steps in README.md to generate the engines.
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Executor Bindings Example")
parser.add_argument("--model_path",
type=str,
required=True,
help="Directory containing model engine")
args = parser.parse_args()
# debug_config = trtllm.DebugConfig(dump_input_tensors=True,
# dump_output_tensors=True,
# debug_tensor_names=["test"])
# Select which tensors should be dumped
debug_config = trtllm.DebugConfig(debug_tensor_names=["host_request_types"])
# Create the executor.
executor = trtllm.Executor(
args.model_path, trtllm.ModelType.DECODER_ONLY,
trtllm.ExecutorConfig(1, debug_config=debug_config))
if executor.can_enqueue_requests():
# Create the request.
request = trtllm.Request(input_token_ids=[1, 2, 3, 4], max_tokens=2)
# Enqueue the request.
request_id = executor.enqueue_request(request)
# Wait for the new tokens.
responses = executor.await_responses(request_id)
output_tokens = responses[0].result.output_token_ids
# Print tokens.
print(output_tokens)
print("debug tensors:")
debug_dir = pl.Path("/tmp/tllm_debug/PP_1/TP_1")
for iter_dir in [x for x in debug_dir.iterdir() if x.is_dir()]:
print(iter_dir.name)
for file in [x for x in iter_dir.iterdir() if x.is_file()]:
print(file.name, np.load(file))