TensorRT-LLMs/searchindex.js
2024-02-29 20:56:26 +08:00

1 line
493 KiB
JavaScript

Search.setIndex({"docnames": ["2023-05-17-how-to-add-a-new-model", "2023-05-19-how-to-debug", "_cpp_gen/runtime", "architecture", "batch_manager", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "build_from_source", "gpt_attention", "gpt_runtime", "graph-rewriting", "index", "inference_request", "lora", "memory", "new_workflow", "perf_best_practices", "performance", "performance_analysis", "precision", "python-api/tensorrt_llm.functional", "python-api/tensorrt_llm.layers", "python-api/tensorrt_llm.models", "python-api/tensorrt_llm.plugin", "python-api/tensorrt_llm.quantization", "python-api/tensorrt_llm.runtime"], "filenames": ["2023-05-17-how-to-add-a-new-model.md", "2023-05-19-how-to-debug.md", "_cpp_gen/runtime.rst", "architecture.md", "batch_manager.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "build_from_source.md", "gpt_attention.md", "gpt_runtime.md", "graph-rewriting.md", "index.rst", "inference_request.md", "lora.md", "memory.md", "new_workflow.md", "perf_best_practices.md", "performance.md", "performance_analysis.md", "precision.md", "python-api/tensorrt_llm.functional.rst", "python-api/tensorrt_llm.layers.rst", "python-api/tensorrt_llm.models.rst", "python-api/tensorrt_llm.plugin.rst", "python-api/tensorrt_llm.quantization.rst", "python-api/tensorrt_llm.runtime.rst"], "titles": ["How to add a new model", "How to debug", "Runtime", "TensorRT-LLM Architecture", "The Batch Manager in TensorRT-LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "Build from Source", "Multi-head, Multi-query and Group-query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "Welcome to TensorRT-LLM\u2019s documentation!", "Inference Request", "Run gpt-2b + LoRA using GptManager / cpp runtime", "Memory Usage of TensorRT-LLM", "New Workflow", "Best Practices for Tuning the Performance of TensorRT-LLM", "Performance of TensorRT-LLM", "Performance Analysis of TensorRT-LLM", "Numerical Precision", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime"], "terms": {"thi": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28], "document": [0, 1, 3, 6, 7, 9, 10, 11, 12, 15, 17, 18, 19, 20, 21, 22, 23], "describ": [0, 1, 3, 4, 8, 10, 11, 12, 15, 16, 20, 21, 22, 23], "tensorrt": [0, 1, 2, 5, 8, 11, 12, 13, 22, 23, 28], "llm": [0, 1, 5, 8, 11, 12, 13, 22, 23], "what": [0, 21], "provid": [0, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 17, 18, 19, 20, 21, 23, 28], "low": [0, 9, 11, 19], "level": [0, 2, 3, 4, 17, 18, 21], "function": [0, 1, 2, 3, 4, 11, 12, 14, 17, 19, 20, 21, 22, 26, 28], "concat": [0, 19, 23], "sum": [0, 12, 13, 23], "etc": [0, 2, 17], "basic": 0, "layer": [0, 1, 2, 3, 11, 12, 13, 14, 16, 17, 18, 19, 22, 23], "linear": [0, 3, 17, 18, 22, 23], "layernorm": [0, 23, 24, 25], "high": [0, 3, 5, 9, 17, 19, 20], "mlp": [0, 1, 3, 16, 23, 25], "attent": [0, 3, 4, 5, 12, 14, 16, 17, 20, 23], "develop": [0, 3, 10, 12, 18, 20, 23], "need": [0, 3, 4, 10, 11, 12, 13, 17, 18, 19, 20, 21, 23, 25, 26, 28], "implement": [0, 3, 4, 5, 11, 12, 18, 19, 20, 22, 23], "creat": [0, 2, 3, 4, 12, 13, 15, 17, 20, 21, 23, 25, 28], "directori": [0, 3, 4, 10, 18, 20, 25, 28], "tensorrt_llm": [0, 1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 23, 24, 25, 26, 27, 28], "e": [0, 2, 11, 12, 13, 16, 20, 21, 22, 23, 28], "g": [0, 16, 28], "bloom": [0, 12, 22], "write": 0, "py": [0, 1, 3, 10, 11, 13, 16, 18, 20, 23, 26, 28], "It": [0, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 16, 19, 20, 22, 23], "": [0, 3, 4, 5, 7, 8, 10, 12, 13, 17, 18, 20, 22, 23, 24, 25, 28], "option": [0, 2, 6, 12, 13, 15, 17, 20, 23], "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28], "usual": [1, 3, 19, 23], "we": [1, 4, 8, 9, 10, 12, 13, 18, 20, 21, 23], "want": [1, 11, 18, 21, 23], "print": [1, 2, 11, 17, 20, 21], "intermedi": [1, 3, 11], "tensor": [1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 18, 20, 22, 23, 24, 25, 28], "valu": [1, 2, 3, 4, 5, 6, 11, 12, 15, 16, 17, 18, 19, 22, 23, 25, 27, 28], "when": [1, 3, 4, 8, 9, 10, 11, 12, 15, 17, 19, 20, 22, 23, 24, 25, 28], "obei": 1, "defin": [1, 3, 4, 7, 11, 12, 13, 18, 22, 23, 24], "run": [1, 2, 3, 4, 5, 9, 10, 11, 12, 14, 17, 18, 19, 21, 22, 23, 28], "paradigm": 1, "should": [1, 2, 10, 13, 16, 17, 19, 20, 23, 24, 28], "mark": [1, 2, 4, 12, 13, 23], "interest": 1, "network": [1, 3, 11, 13, 17, 20, 22, 23], "output": [1, 2, 4, 5, 6, 7, 8, 9, 11, 13, 15, 19, 20, 21, 23, 24, 28], "Then": [1, 23], "runtim": [1, 4, 11, 14, 20, 21, 23, 24], "regist": [1, 4], "register_network_output": 1, "api": [1, 3, 10, 12, 17, 18, 19, 21, 23], "class": [1, 2, 3, 4, 9, 10, 11, 12, 13, 15, 23, 24, 25, 26, 27, 28], "modul": [1, 3, 10, 11, 12, 14, 18, 24, 25, 28], "def": [1, 3, 13], "__init__": [1, 3, 13], "self": [1, 3, 11, 13, 23, 25, 28], "hidden_s": [1, 13, 18, 20, 23, 24, 25, 28], "ffn_hidden_s": [1, 24, 25], "bia": [1, 3, 12, 15, 18, 20, 23, 24, 25], "true": [1, 2, 4, 12, 13, 15, 18, 19, 20, 23, 24, 25, 26, 28], "tp_group": [1, 23, 24], "none": [1, 13, 23, 24, 25, 26, 28], "tp_size": [1, 18, 20, 23, 24, 25], "1": [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 28], "super": [1, 13], "fc": [1, 3, 18], "columnlinear": [1, 24], "gather_output": [1, 24], "fals": [1, 2, 4, 11, 12, 13, 15, 18, 20, 23, 24, 25, 26, 28], "proj": [1, 18], "rowlinear": [1, 24], "forward": [1, 2, 13, 19, 24, 25], "hidden_st": [1, 23, 24, 25, 28], "inter": 1, "relu": [1, 3, 18, 23, 25], "here": [1, 3, 10, 13, 15, 16, 17, 18, 21, 22, 23], "after": [1, 2, 3, 4, 10, 11, 12, 13, 16, 17, 19, 20, 21, 23, 24, 26], "return": [1, 2, 3, 4, 13, 15, 17, 23, 24, 25, 28], "k": [1, 3, 11, 12, 16, 22, 23], "v": [1, 2, 5, 6, 9, 11, 12, 16, 20, 22, 23], "gm": 1, "named_network_output": 1, "net": 1, "_mark_output": 1, "dtype": [1, 2, 3, 13, 16, 18, 20, 23, 24, 25, 28], "kei": [1, 3, 5, 9, 20, 25, 28], "i": [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 28], "full": [1, 4, 6, 7, 11, 12, 15, 17, 21], "exampl": [1, 3, 4, 5, 7, 9, 10, 11, 12, 13, 17, 19, 20, 22, 23, 28], "an": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 27, 28], "gpt": [1, 2, 3, 4, 6, 9, 11, 14, 17, 22, 23], "In": [1, 6, 9, 10, 11, 13, 17, 20, 22, 23], "residu": 1, "attention_output": 1, "data": [1, 2, 3, 5, 6, 7, 8, 9, 11, 15, 19, 20, 23, 25], "post_layernorm": [1, 18, 23], "mlp_output": 1, "build": [1, 3, 4, 11, 12, 13, 14, 16, 17, 26], "net_guard": [1, 13], "set_named_paramet": 1, "tensorrt_llm_gpt": [1, 3], "named_paramet": 1, "input": [1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 15, 17, 20, 23, 24, 25, 28], "prepare_input": [1, 17, 25], "arg": [1, 13, 25], "max_batch_s": [1, 4, 11, 16, 17, 18, 19, 20, 23, 25, 28], "max_input_len": [1, 4, 16, 17, 18, 19, 20, 25, 28], "max_output_len": [1, 4, 16, 18, 20], "max_beam_width": [1, 4, 11, 17, 19, 23, 25, 28], "trt": [1, 3, 4, 6, 13, 17, 23, 25, 28], "str_dtype_to_trt": 1, "engin": [1, 3, 4, 8, 11, 12, 13, 17, 19, 23, 28], "rm": [1, 10, 20, 23], "rf": 1, "gpt2": [1, 12], "git": [1, 10, 16, 20], "clone": [1, 10, 16, 20], "http": [1, 10, 11, 16, 20, 22, 23], "huggingfac": [1, 3, 16, 18], "co": [1, 16, 23], "medium": [1, 9], "pushd": 1, "pytorch_model": 1, "bin": [1, 18, 21], "safetensor": [1, 18], "wget": 1, "q": [1, 5, 11, 12, 16, 23], "resolv": 1, "main": [1, 5, 8, 12, 15, 21, 23], "popd": 1, "python3": [1, 10, 16, 18, 20], "hf_gpt_convert": 1, "o": [1, 8, 13, 16, 17], "c": [1, 2, 3, 4, 11, 13, 19, 20, 21, 23], "parallel": [1, 3, 4, 5, 7, 8, 12, 17, 18, 20, 23, 24, 25], "storag": [1, 16, 20], "type": [1, 2, 3, 4, 6, 9, 11, 12, 13, 15, 16, 18, 22, 23, 25, 28], "float16": [1, 13, 15, 16, 18, 20, 25, 26], "model_dir": [1, 16, 18], "gpu": [1, 2, 6, 7, 8, 9, 10, 11, 12, 16, 18, 23, 25], "use_gpt_attention_plugin": [1, 28], "open": [1, 5, 12], "mode": [1, 3, 4, 11, 13, 17, 22, 23, 24, 28], "decod": [1, 2, 11, 12, 17, 21, 28], "generationsess": [1, 11, 17, 28], "model_config": [1, 20, 28], "engine_buff": [1, 28], "runtime_map": 1, "debug_mod": [1, 28], "gener": [1, 2, 3, 4, 5, 6, 8, 15, 17, 19, 20, 21, 23, 28], "info": [1, 21], "step": [1, 2, 3, 4, 5, 11, 12, 13, 18, 19, 20, 23, 28], "0": [1, 2, 3, 4, 6, 7, 9, 11, 12, 13, 16, 18, 19, 20, 21, 23, 24, 25, 28], "ctx_shape": 1, "ctx_buffer": 1, "_get_context_shape_buff": 1, "input_id": [1, 15, 25, 28], "max_input_length": [1, 19, 23, 24, 25], "input_length": [1, 23, 24, 25, 28], "position_id": [1, 25], "last_token_id": [1, 23, 25], "attention_mask": [1, 24, 25, 28], "this_src_cache_indirect": 1, "_set_shap": 1, "context": [1, 4, 9, 12, 15, 17, 21, 23, 28], "_set_buff": 1, "debug_buff": 1, "stream": [1, 2, 3, 4, 12, 15, 17, 19, 20, 28], "torch": [1, 10, 11, 23, 28], "cuda": [1, 2, 3, 10, 11, 12, 17, 19, 21, 28], "current_stream": 1, "cuda_stream": 1, "ok": 1, "_run": 1, "rais": 1, "runtimeerror": 1, "fail": [1, 17, 28], "synchron": [1, 2, 3], "6": [1, 9, 12, 16, 20, 23], "max_new_token": [1, 17, 25, 28], "next_step_shap": 1, "next_step_buff": [1, 28], "_get_next_step_shape_buff": 1, "batch_siz": [1, 5, 8, 11, 13, 17, 18, 19, 20, 23, 24, 28], "scfg": [1, 28], "num_beam": 1, "next_src_cache_indirect": 1, "next_context": 1, "see": [1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25], "python": [1, 3, 12, 13, 16, 19, 22], "8": [1, 2, 5, 7, 8, 9, 11, 16, 17, 18, 20, 21, 22, 23], "dict_kei": 1, "logit": [1, 2, 4, 12, 15, 23, 28], "cache_indirect": [1, 11, 23, 24, 28], "past_key_0": 1, "past_value_0": 1, "present_key_0": 1, "present_value_0": 1, "past_key_1": 1, "past_value_1": 1, "present_key_1": 1, "present_value_1": 1, "past_key_2": 1, "past_value_2": 1, "present_key_2": 1, "present_value_2": 1, "past_key_3": 1, "past_value_3": 1, "present_key_3": 1, "present_value_3": 1, "past_key_4": 1, "past_value_4": 1, "present_key_4": 1, "present_value_4": 1, "past_key_5": 1, "past_value_5": 1, "present_key_5": 1, "present_value_5": 1, "past_key_6": 1, "past_value_6": 1, "present_key_6": 1, "present_value_6": 1, "past_key_7": 1, "past_value_7": 1, "present_key_7": 1, "present_value_7": 1, "past_key_8": 1, "past_value_8": 1, "present_key_8": 1, "present_value_8": 1, "past_key_9": 1, "past_value_9": 1, "present_key_9": 1, "present_value_9": 1, "past_key_10": 1, "past_value_10": 1, "present_key_10": 1, "present_value_10": 1, "past_key_11": 1, "past_value_11": 1, "present_key_11": 1, "present_value_11": 1, "past_key_12": 1, "past_value_12": 1, "present_key_12": 1, "present_value_12": 1, "past_key_13": 1, "past_value_13": 1, "present_key_13": 1, "present_value_13": 1, "past_key_14": 1, "past_value_14": 1, "present_key_14": 1, "present_value_14": 1, "past_key_15": 1, "past_value_15": 1, "present_key_15": 1, "present_value_15": 1, "past_key_16": 1, "past_value_16": 1, "present_key_16": 1, "present_value_16": 1, "past_key_17": 1, "past_value_17": 1, "present_key_17": 1, "present_value_17": 1, "past_key_18": 1, "past_value_18": 1, "present_key_18": 1, "present_value_18": 1, "past_key_19": 1, "past_value_19": 1, "present_key_19": 1, "present_value_19": 1, "past_key_20": 1, "past_value_20": 1, "present_key_20": 1, "present_value_20": 1, "past_key_21": 1, "past_value_21": 1, "present_key_21": 1, "present_value_21": 1, "past_key_22": 1, "past_value_22": 1, "present_key_22": 1, "present_value_22": 1, "past_key_23": 1, "past_value_23": 1, "present_key_23": 1, "present_value_23": 1, "sequence_length": [1, 23, 24, 28], "past_key_value_length": [1, 24], "2": [1, 2, 4, 5, 6, 7, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 28], "3": [1, 5, 6, 7, 9, 11, 12, 13, 15, 16, 17, 18, 20, 23, 28], "4": [1, 7, 9, 12, 13, 16, 17, 20, 22, 23], "5": [1, 5, 6, 7, 9, 12, 16, 18, 19, 20, 23], "7": [1, 5, 6, 9, 12, 16, 20, 23], "9": [1, 4, 6, 12, 20, 23], "10": [1, 9, 23], "11": [1, 7, 9, 10, 23], "12": [1, 6, 18, 20, 21, 23], "13": [1, 8, 21, 23], "14": [1, 9, 18, 20, 21], "15": [1, 20], "16": [1, 6, 9, 17, 20, 22, 25], "17": [1, 20], "18": 1, "19": 1, "20": [1, 19, 20], "21": [1, 9], "22": [1, 23], "23": [1, 20], "0295": 1, "0256": 1, "0780": 1, "0562": 1, "0241": 1, "0273": 1, "0089": 1, "5882": 1, "1989": 1, "0464": 1, "6305": 1, "5967": 1, "8793": 1, "1056": 1, "7083": 1, "0889": 1, "0714": 1, "2931": 1, "1209": 1, "0886": 1, "5927": 1, "1048": 1, "3437": 1, "1085": 1, "0752": 1, "0739": 1, "6156": 1, "3454": 1, "3014": 1, "2653": 1, "7126": 1, "9685": 1, "1145": 1, "0084": 1, "9521": 1, "1425": 1, "devic": [1, 2, 4, 20, 21, 23, 28], "2129": 1, "5879": 1, "8172": 1, "7892": 1, "6887": 1, "6063": 1, "4184": 1, "0066": 1, "3895": 1, "9023": 1, "0686": 1, "2831": 1, "7935": 1, "5085": 1, "1696": 1, "5839": 1, "1375": 1, "0078": 1, "0810": 1, "1262": 1, "6260": 1, "1065": 1, "0529": 1, "7143": 1, "3322": 1, "8835": 1, "3427": 1, "8159": 1, "0622": 1, "2327": 1, "2217": 1, "2057": 1, "1475": 1, "3545": 1, "1673": 1, "1131": 1, "1268": 1, "1570": 1, "3972": 1, "8213": 1, "3282": 1, "8672": 1, "born": 1, "north": 1, "east": 1, "franc": 1, "soyer": 1, "train": [1, 3, 6, 9, 18], "chef": 1, "befor": [1, 2, 3, 4, 10, 11, 13, 17, 18, 20, 23, 28], "move": 1, "london": 1, "earli": 1, "If": [1, 3, 4, 9, 10, 11, 12, 13, 17, 19, 20, 23, 25, 28], "you": [1, 3, 4, 9, 10, 11, 12, 13, 17, 18, 20, 23, 28], "plugin": [1, 10, 11, 12, 13, 14, 17, 18, 20, 22, 23, 25], "can": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 17, 19, 20, 22, 23, 25, 28], "set": [1, 2, 4, 11, 12, 13, 15, 17, 19, 20, 21, 23, 24, 25, 26, 28], "environ": [1, 4, 10, 12, 20], "variabl": [1, 4, 7, 12, 23], "cuda_launch_block": 1, "so": [1, 2, 10, 11, 13, 17, 18, 19, 20, 23, 24], "kernel": [1, 2, 3, 5, 11, 12, 17, 19, 20, 21, 23], "ar": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28], "launch": [1, 2, 3, 4, 12], "statu": 1, "check": [1, 17, 23], "immedi": [1, 11], "memori": [1, 2, 3, 4, 5, 6, 8, 9, 11, 12, 14, 20, 21, 28], "make": [1, 3, 9, 11, 13, 20, 23], "sure": [1, 20, 23], "respect": [1, 12, 17, 19, 20, 22, 23, 28], "time": [1, 2, 3, 4, 7, 9, 10, 19, 20, 23, 28], "shape": [1, 2, 3, 4, 11, 12, 13, 15, 16, 17, 18, 22, 23, 25, 28], "thei": [1, 3, 10, 11, 12, 15, 16, 19, 20, 22, 23, 25], "resid": 1, "correct": [1, 11, 16, 19], "cpu": [1, 2, 3, 4, 19, 21, 23], "namespac": [2, 4, 12], "includ": [2, 3, 4, 5, 6, 8, 10, 11, 12, 15, 16, 18, 19, 20, 22], "A": [2, 3, 4, 9, 11, 12, 16, 18, 23, 28], "helper": [2, 23], "manag": [2, 3, 11, 12, 14, 17, 19, 28], "host": [2, 4, 10, 19, 23], "public": [2, 9, 12], "ibufferptr": 2, "uniqueptr": 2, "itensorptr": 2, "cudastreamptr": 2, "std": [2, 4, 12], "shared_ptr": [2, 4], "explicit": [2, 12, 21, 23], "construct": [2, 3], "paramet": [2, 3, 11, 16, 17, 18, 19, 23, 24, 25, 28], "The": [2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 28], "all": [2, 3, 4, 7, 10, 11, 12, 13, 16, 17, 19, 20, 22, 23, 24, 26, 28], "oper": [2, 3, 4, 11, 12, 13, 17, 18, 19, 20, 23], "alloc": [2, 4, 11, 12, 17, 19, 23, 28], "de": 2, "copi": [2, 4, 12, 17, 19, 20, 23], "size_t": [2, 12], "size": [2, 4, 6, 7, 9, 11, 12, 15, 16, 20, 23, 24, 25], "nvinfer1": 2, "datatyp": [2, 3, 12, 23, 28], "kbyte_typ": 2, "const": [2, 4, 12], "given": [2, 4, 7, 12, 17, 21, 22, 23, 24, 25, 28], "dim": [2, 23, 24], "dimens": [2, 11, 12, 16, 17, 19, 23, 24, 25], "uvm": 2, "memorytyp": 2, "inlin": 2, "emptybuff": 2, "empti": [2, 4, 23], "mai": [2, 3, 4, 10, 11, 12, 17, 18, 19, 20, 21, 23, 26], "resiz": 2, "later": [2, 3, 16, 17], "emptytensor": 2, "reshap": 2, "void": [2, 3, 4, 12], "setzero": 2, "buffer": [2, 4, 12, 17, 23], "content": [2, 16, 17, 23], "zero": [2, 12, 22, 23, 24], "src": [2, 3, 23], "dst": 2, "srctype": 2, "dsttype": 2, "copyfrom": 2, "new": [2, 4, 6, 7, 11, 12, 13, 14, 17, 23, 28], "potenti": [2, 4, 15, 21], "differ": [2, 3, 9, 11, 12, 17, 18, 19, 20, 22, 23, 25], "templat": [2, 3], "typenam": [2, 3], "t": [2, 3, 11, 17, 21, 23], "vector": [2, 12, 23], "getstream": 2, "get": [2, 8, 10, 11, 12, 13, 21, 23, 28], "underli": [2, 4, 17], "memorypoolreserv": [2, 17], "current": [2, 4, 9, 11, 12, 16, 17, 19, 20, 23, 28], "reserv": [2, 4, 12, 17, 19], "pool": [2, 4, 11, 28], "memorypoolus": 2, "memorypoolfre": [2, 17], "free": [2, 3, 4, 12, 17, 20, 21], "memorypooltrimto": 2, "try": [2, 4, 17, 19, 20], "trim": 2, "byte": [2, 12, 28], "implicitli": 2, "static": [2, 21, 23, 24, 28], "pin": [2, 21], "pinnedpool": 2, "default": [2, 4, 10, 11, 12, 15, 17, 18, 19, 20, 23, 25, 28], "attribut": [2, 13, 28], "constexpr": 2, "auto": [2, 3, 4, 11, 12, 23], "kuint8": 2, "privat": [2, 12], "member": [2, 3, 4, 12, 13, 23], "mstream": 2, "initmemorypool": [2, 17], "int": [2, 3, 12, 18, 23, 24, 25, 26, 28], "typedef": 2, "sizetyp": 2, "int32_t": [2, 4, 15, 23], "tokenidtyp": 2, "stringptrmap": 2, "unordered_map": 2, "string": [2, 4, 12, 18, 23, 28], "pointer": [2, 4, 12, 23, 28], "cudaevent_t": 2, "unsign": [2, 4], "flag": [2, 8, 11, 12, 17, 19, 23], "cudaeventdisabletim": 2, "event": 2, "destroi": [2, 17], "destructor": [2, 4], "creation": [2, 23], "By": [2, 10, 12, 23], "disabl": [2, 4, 11, 12, 17, 19, 20, 23, 26, 28], "bool": [2, 4, 12, 13, 15, 18, 23, 24, 25, 26, 28], "ownsev": 2, "pass": [2, 4, 11, 12, 13, 16, 17, 21, 23, 24, 28], "exist": [2, 12, 28], "object": [2, 3, 12, 15, 17, 23, 24, 25, 26, 28], "whether": [2, 4, 11, 12, 23, 24, 28], "own": [2, 3, 4, 10, 12, 18], "associ": [2, 4, 10, 12, 16, 23], "element_typ": 2, "remove_pointer_t": 2, "eventptr": 2, "unique_ptr": 2, "delet": [2, 26], "mevent": 2, "mownsev": 2, "cudastreamnonblock": 2, "prioriti": 2, "cudastreamcreatewithflag": 2, "list": [2, 3, 4, 10, 12, 13, 15, 18, 20, 23, 24, 25, 28], "valid": [2, 15, 20, 23], "lower": [2, 8, 9, 12, 13, 17, 19, 23], "number": [2, 3, 4, 8, 11, 12, 15, 17, 20, 22, 23, 24], "repres": [2, 5, 9, 20, 23, 28], "higher": [2, 3, 4, 5, 6, 8, 12, 17, 19, 20], "cudadevicegetstreampriorityrang": 2, "more": [2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 23], "inform": [2, 5, 8, 11, 12, 18, 21], "about": [2, 5, 8, 9, 17, 20], "meaning": 2, "cudastream_t": 2, "ownsstream": 2, "which": [2, 4, 5, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28], "wa": [2, 11, 12, 18, 20, 22, 24], "getdevic": 2, "record": [2, 13], "wait": [2, 4], "streamptr": 2, "mdevic": 2, "mownsstream": 2, "tensorptr": 2, "maxlength": 2, "maxattentionwindow": [2, 4], "sinktokenlength": 2, "maxbatchs": [2, 12], "endid": [2, 12], "finish": [2, 12, 28], "sequencelimitlength": 2, "embeddingbia": 2, "length": [2, 4, 5, 6, 7, 8, 9, 11, 12, 17, 19, 20, 23], "badwordslist": [2, 12], "stopwordslist": [2, 12], "norepeatngrams": 2, "batchslot": 2, "cacheindirect": 2, "sharedptr": 2, "id": [2, 4, 12, 15, 21, 23, 24, 28], "newtokensstep": 2, "newtoken": 2, "newtokensvec": 2, "finishedsum": 2, "logprob": [2, 4, 12], "cumlogprob": [2, 4], "parentid": 2, "beamhypothes": 2, "float": [2, 3, 6, 12, 15, 18, 19, 22, 23, 24, 25, 28], "knegativeinfin": 2, "1e20f": 2, "batchsiz": [2, 6, 12], "beamwidth": [2, 4, 12], "maxsequencelength": [2, 12, 17], "releas": [2, 5, 8, 9, 11, 12, 17, 20, 22, 23], "init": [2, 10, 20], "slice": [2, 23], "batchindex": 2, "outputidstgt": 2, "sequencelengthstgt": 2, "normedscor": 2, "minnormedscor": 2, "numbeam": 2, "isdon": 2, "ttensor": 2, "genericgenerationinput": 2, "padid": [2, 12], "pack": [2, 4, 12, 17, 19, 23], "maxnewtoken": [2, 12], "base": [2, 3, 5, 6, 9, 12, 17, 19, 21, 23, 24, 25, 26, 27, 28], "genericgenerationoutput": 2, "callback": [2, 12], "contextlogit": [2, 12], "generationlogit": [2, 12], "ontokengener": [2, 12], "igptdecod": 2, "subclass": 2, "virtual": [2, 24], "setup": [2, 11, 17, 28], "forwardasync": 2, "gathertre": 2, "finaloutputid": 2, "getsamplingconfig": 2, "acceptdrafttokensbyid": 2, "targettokenid": 2, "drafttokenid": 2, "contextlength": 2, "numdrafttoken": 2, "sequencelength": 2, "finishedvec": 2, "finishedfin": 2, "acceptdrafttokensbylogit": 2, "draftlogit": 2, "targetlogit": 2, "draftprob": 2, "targetprob": 2, "vocabs": [2, 12], "vocabsizepad": [2, 4, 12], "userandomacceptthreshold": 2, "randomacceptthreshold": 2, "curandstate_t": 2, "curandst": 2, "overrid": [2, 28], "mmanag": 2, "dynamicdecodelay": 2, "mdynamicdecodelay": 2, "mlogprobstil": 2, "msamplingconfig": 2, "support": [2, 4, 5, 6, 7, 8, 9, 11, 16, 18, 19, 23], "flight": [2, 11, 17], "batch": [2, 6, 7, 9, 14, 15, 16, 17, 20, 21, 23, 28], "maxbeamwidth": [2, 4, 12], "maxtokensperstep": 2, "call": [2, 3, 4, 11, 12, 13, 17, 21, 23, 25, 28], "newrequest": 2, "batchidx": 2, "decoder_batch": 2, "request": [2, 3, 6, 8, 11, 12, 14, 17, 19, 21, 23], "initi": [2, 17], "newbatch": 2, "tokenptr": 2, "one": [2, 4, 5, 10, 11, 12, 13, 15, 17, 18, 19, 20, 23, 28], "without": [2, 3, 4, 9, 11, 17, 23], "block": [2, 3, 4, 11, 12, 17, 21, 23, 28], "process": [2, 3, 4, 11, 12, 18, 19, 20, 21, 23], "token": [2, 4, 5, 8, 9, 11, 12, 15, 17, 21, 22, 23, 24, 28], "forwardsync": 2, "complet": [2, 4, 12, 15], "thread": [2, 4, 11, 19], "last": [2, 4, 11, 12, 16, 19, 23], "getfinish": 2, "indic": [2, 4, 11, 12, 17, 18, 19, 20, 23, 24], "getoutputid": 2, "index": [2, 12, 14, 21, 23], "maxinputlength": [2, 12], "contain": [2, 3, 4, 11, 12, 13, 16, 18, 22, 23, 25, 28], "pad": [2, 4, 12, 13, 15, 16, 17, 23, 24], "final": [2, 4, 12, 16, 23], "gather": [2, 23], "beam": [2, 3, 4, 8, 12, 15, 17, 23], "search": [2, 3, 8, 12, 14, 23], "result": [2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 19, 21, 23, 24], "onli": [2, 3, 4, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 23, 24, 28], "avail": [2, 3, 4, 5, 7, 10, 13, 17, 19, 20, 22], "getparentid": 2, "parent": 2, "collect": [2, 3, 4, 13, 20, 23], "dure": [2, 3, 4, 8, 11, 12, 13, 17, 19, 21, 23, 28], "getcumlogprob": 2, "cumul": [2, 23], "log": [2, 4, 11, 12, 15, 17], "probabl": [2, 12], "per": [2, 4, 5, 7, 8, 11, 12, 17, 21, 22, 23], "getlogprob": 2, "getallnewtoken": 2, "getnewtoken": 2, "iter": [2, 3, 4, 15, 19, 28], "within": [2, 3, 11, 20, 23], "getnbstep": 2, "execut": [2, 3, 10, 12, 17, 19, 23, 28], "each": [2, 3, 4, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28], "getnbfinish": 2, "sequenc": [2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 17, 20, 23, 24, 28], "gptdecoderptr": 2, "decodinginputptr": 2, "decodingoutputptr": 2, "postprocessrequest": 2, "mvocabs": 2, "mvocabsizepad": 2, "mbuffermanag": 2, "mforwardtoken": 2, "mforwardev": 2, "mdecod": 2, "mdecodinginput": 2, "mdecodingoutput": 2, "mjointdecodinginput": 2, "mjointdecodingoutput": 2, "mdrafttokenid": 2, "mdraftlogit": 2, "macceptbylogit": 2, "mnumdrafttoken": 2, "mcurandst": 2, "mnbstep": 2, "mfinish": 2, "mfinishedsum": 2, "mmaxnewtoken": 2, "mbeamwidth": 2, "mgeneratedtokensperstep": 2, "mfinishedstep": 2, "mdraftprob": 2, "mtargetprob": 2, "mmaxsequencelength": 2, "mmaxattentionwindow": 2, "msinktokenlength": 2, "mactualbatchs": 2, "mmaxtokensperstep": 2, "name": [2, 4, 12, 13, 15, 16, 18, 21, 23, 28], "version": [2, 10, 11, 12, 18, 20, 23], "precis": [2, 5, 9, 12, 14, 17], "tensorparallel": [2, 12], "pipelineparallel": [2, 12], "modelconfig": [2, 12, 28], "getmodelconfig": 2, "getnam": 2, "getvers": 2, "getprecis": 2, "gettensorparallel": 2, "getpipelineparallel": 2, "getworlds": 2, "enginefilenam": 2, "model": [2, 4, 5, 6, 7, 8, 9, 11, 14, 15, 16, 17, 18, 22, 23], "pars": 2, "json": [2, 4, 18, 20], "istream": 2, "filesystem": 2, "path": [2, 4, 10, 11, 18, 20, 23, 28], "mname": 2, "mversion": 2, "mprecis": 2, "mtensorparallel": 2, "mpipelineparallel": 2, "mgptmodelconfig": 2, "enum": 2, "modelvari": 2, "enumer": [2, 23, 27], "kgpt": 2, "kglm": 2, "nblayer": 2, "nbhead": 2, "hiddens": [2, 12], "getvocabs": 2, "noexcept": 2, "getvocabsizepad": 2, "worldsiz": [2, 12], "getnblay": 2, "getnbhead": 2, "getnbkvhead": 2, "setnbkvhead": 2, "nbkvhead": 2, "gethiddens": 2, "getsizeperhead": 2, "setsizeperhead": 2, "sizeperhead": 2, "getdatatyp": 2, "usegptattentionplugin": [2, 12], "usepackedinput": 2, "inputpack": [2, 12], "usepagedkvcach": 2, "pagedkvcach": [2, 12], "gettokensperblock": 2, "settokensperblock": 2, "tokensperblock": [2, 12], "quantmod": [2, 11, 12, 23, 24, 25, 27, 28], "getquantmod": 2, "setquantmod": 2, "supportsinflightbatch": 2, "getmaxbatchs": 2, "setmaxbatchs": 2, "getmaxbeamwidth": 2, "setmaxbeamwidth": 2, "getmaxinputlen": 2, "setmaxinputlen": 2, "maxinputlen": [2, 12], "getmaxsequencelen": 2, "setmaxsequencelen": 2, "maxsequencelen": [2, 12], "getmaxnumtoken": 2, "setmaxnumtoken": 2, "maxnumtoken": 2, "useprompttun": 2, "getmaxpromptembeddingtables": 2, "setmaxpromptembeddingtables": 2, "maxpromptembeddingtables": 2, "computecontextlogit": 2, "computegenerationlogit": 2, "getmodelvari": 2, "setmodelvari": 2, "usecustomallreduc": 2, "customallreduc": 2, "setmaxdraftlen": 2, "maxdraftlen": 2, "getmaxdraftlen": 2, "getmaxtokensperstep": 2, "setusecontextfmhaforgener": 2, "usecontextfmhaforgener": 2, "getcontextfmhaforgener": 2, "setpagedcontextfmha": 2, "pagedcontextfmha": 2, "getpagedcontextfmha": 2, "useloraplugin": 2, "loramodul": [2, 16], "getloramodul": 2, "setloramodul": 2, "getmlphiddens": 2, "setmlphiddens": 2, "mlphiddens": 2, "getmaxlorarank": 2, "setmaxlorarank": 2, "maxlorarank": 2, "mnblayer": 2, "mnbhead": 2, "mnbkvhead": 2, "mhiddens": 2, "msizeperhead": 2, "mdatatyp": 2, "musegptattentionplugin": 2, "minputpack": 2, "mpagedkvcach": 2, "mtokensperblock": 2, "mquantmod": 2, "mmaxbatchs": 2, "mmaxbeamwidth": 2, "mmaxinputlen": 2, "mmaxsequencelen": 2, "mmaxnumtoken": 2, "mcomputecontextlogit": 2, "mcomputegenerationlogit": 2, "mmodelvari": 2, "musecustomallreduc": 2, "mmaxpromptembeddingtables": 2, "mmaxdraftlen": 2, "musecontextfmhaforgener": 2, "mpagedcontextfmha": 2, "museloraplugin": 2, "mloramodul": 2, "mmlphiddens": 2, "mmaxlorarank": 2, "batch_manag": [2, 4], "kv_cache_manag": 2, "loggerptr": 2, "ilogg": 2, "config": [2, 5, 11, 12, 15, 17, 20, 25, 28], "sessionconfig": [2, 12], "enginebuff": [2, 12], "engines": [2, 12], "logger": [2, 12], "nullptr": 2, "uint8_t": [2, 12], "enginefil": 2, "getlogg": 2, "getbuffermanag": 2, "getworldconfig": 2, "getlogitdatatyp": 2, "kvcachemanag": [2, 11, 28], "kvcacheconfig": [2, 4, 12, 17], "tokengeneratedcallback": 2, "usecudagraph": 2, "generatebatch": 2, "microbatchesoutput": 2, "microbatchesinput": 2, "createcontext": 2, "createbuff": 2, "nummicrobatch": 2, "createdecod": 2, "logitstyp": 2, "decoderperrequest": [2, 12], "createkvcachemanag": 2, "createcustomallreduceworkspac": 2, "executecontextstep": 2, "generationbatchesinput": 2, "generationbatchesoffset": 2, "executegenerationstep": 2, "microbatchoffset": 2, "microbatchesfinish": 2, "decoderstepasync": 2, "decoderstep": 2, "microbatchid": 2, "pp": [2, 5, 8, 12, 16, 23], "rank": [2, 4, 12, 16, 17, 23, 25, 28], "receiv": [2, 4, 23], "other": [2, 3, 5, 10, 11, 12, 17, 19, 20, 23], "shouldstopsync": 2, "shouldstop": 2, "prob": [2, 4, 12, 15], "send": [2, 3, 23], "them": [2, 4, 13, 17, 19, 20, 21, 23, 28], "first": [2, 3, 7, 9, 10, 11, 12, 13, 16, 18, 19, 21, 23], "asynchron": 2, "requir": [2, 3, 4, 5, 9, 10, 11, 12, 17, 19, 20, 23, 24], "access": [2, 4, 12, 20], "kvcacheaddsequ": 2, "firstbatchidx": 2, "initdecod": 2, "outputid": [2, 4], "popul": [2, 3, 4, 11, 12, 23], "refer": [2, 3, 4, 11, 12, 13, 16, 20, 23], "createontokengeneratedcallback": 2, "mmodelconfig": 2, "mworldconfig": 2, "ncclcommun": 2, "mpipelinecomm": 2, "mcommstream": 2, "mcommev": 2, "mcommptr": 2, "ipcmemori": 2, "mipcmemoryhandl": 2, "mdecodermaxsequencelength": 2, "mdecodermaxattentionwindow": 2, "mdecodersinktokenlength": 2, "mlogger": 2, "tllmruntim": [2, 12], "mruntim": 2, "mkvcachemanag": 2, "microbatchconfig": 2, "mmicrobatchconfig": 2, "runtimebuff": 2, "mbuffer": 2, "mreceivedev": 2, "mcudagraphmod": 2, "cudagraphexecutor": 2, "mcudagraphinst": 2, "friend": 2, "trtgptmodelv1": 2, "configur": [2, 3, 7, 11, 15, 16, 17, 19, 20, 28], "session": [2, 28], "width": [2, 4, 11, 12, 15, 17], "smaller": [2, 12, 17, 19, 20, 21, 23], "than": [2, 5, 6, 7, 9, 11, 12, 13, 17, 19, 23], "divid": [2, 23], "micro": [2, 4, 12, 17, 19], "cudagraphmod": [2, 12], "ctxmicrobatchs": [2, 12], "nullopt": 2, "genmicrobatchs": [2, 12], "hasinst": 2, "clear": [2, 28], "preparenextgraph": 2, "nextcontextid": 2, "cudagraph_t": 2, "graph": [2, 3, 12, 14, 17, 23], "updat": [2, 3, 7, 10, 19, 20], "uploadtostream": 2, "cudagraphexec_t": 2, "minstanc": 2, "numctxpergen": 2, "getgengraphid": 2, "flipflopid": 2, "generationbatchid": 2, "flip": [2, 23], "flop": 2, "between": [2, 3, 4, 11, 12, 17, 19, 21, 23], "instanc": [2, 3, 4, 12, 13, 17, 28], "numctxbatch": 2, "numgenbatch": 2, "ctxbatchsiz": 2, "genbatchs": 2, "util": [2, 3, 4, 5, 11, 12, 17, 19, 21], "loadengin": 2, "enginepath": 2, "struct": 2, "memorytypestr": 2, "kgpu": 2, "kcpu": 2, "kpin": 2, "kuvm": 2, "datatypetrait": 2, "kfloat": [2, 3], "char": 2, "sizeof": 2, "khalf": 2, "half": [2, 3, 23], "kint8": 2, "int8_t": 2, "int8": [2, 9, 17, 18, 23], "kint32": 2, "int32": [2, 11, 23], "kint64": 2, "int64_t": 2, "int64": [2, 23], "uint32_t": [2, 23], "uint32": 2, "uint64_t": [2, 4, 15], "uint64": 2, "kunsign": 2, "kbool": 2, "uint8": 2, "trtdatatyp": 2, "bufferdatatyp": 2, "pointerelementtyp": 2, "remove_reference_t": 2, "remove_const_t": 2, "constpointercast": 2, "ptr": 2, "d": [2, 4, 15, 16, 20, 23], "buffercast": 2, "ostream": 2, "kdatatyp": 2, "kisunsign": 2, "kispoint": 2, "For": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 19, 20, 23, 28], "convert": [2, 3, 4, 16, 18], "wrapper": [2, 13], "around": 2, "_unsign": 2, "ispoint": 2, "isunsign": 2, "getsiz": 2, "ktrtpointertyp": 2, "munsign": 2, "mpointer": 2, "kunderlyingtyp": 2, "uniqueconstptr": 2, "sharedconstptr": 2, "arrai": [2, 23, 28], "element": [2, 11, 12, 16, 22, 23], "getsizeinbyt": 2, "getcapac": 2, "capac": [2, 5, 7, 9], "getdatatypenam": 2, "getmemorytyp": 2, "getmemorytypenam": 2, "newsiz": 2, "op": [2, 13, 23], "equal": [2, 12, 17, 19, 23, 24], "reset": [2, 12, 28], "Not": [2, 9], "allow": [2, 4, 5, 8, 11, 12, 18, 19, 20, 21, 23], "offset": [2, 22, 23, 28], "view": [2, 23], "have": [2, 4, 5, 9, 11, 12, 16, 17, 18, 19, 20, 21, 23], "same": [2, 3, 4, 6, 11, 12, 13, 17, 19, 20, 23, 24, 28], "tconstptr": 2, "enable_if_t": 2, "is_const_v": 2, "independ": [2, 23], "wrap": [2, 3, 4, 23, 28], "cannot": [2, 3, 12, 19, 20, 23], "beyond": [2, 6, 19], "determin": [2, 11, 19, 22, 23, 25], "protect": 2, "tobyt": 2, "bufferrang": 2, "value_typ": 2, "size_typ": 2, "const_refer": 2, "const_point": 2, "const_iter": 2, "begin": 2, "end": [2, 3, 4, 11, 12, 15, 19, 20, 21, 23], "cbegin": 2, "cend": 2, "mdata": 2, "msize": 2, "actual": [2, 4, 9, 13, 19, 23], "maxseqlen": 2, "consttensorptr": 2, "bufferptr": 2, "inputlen": 2, "generatedtokensperstep": 2, "drafttoken": 2, "computecumlogprob": 2, "computelogprob": 2, "tensorconstptr": 2, "activ": [2, 3, 4, 5, 6, 9, 11, 12, 13, 19, 21, 22, 23], "also": [2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 17, 18, 19, 21, 22, 23], "reshapebuff": 2, "its": [2, 3, 4, 5, 7, 11, 12, 13, 17, 18, 19, 23], "dimtyp": 2, "decltyp": 2, "getshap": 2, "volum": [2, 10], "squeez": 2, "remov": [2, 3, 10, 11, 12, 13, 17, 20, 23], "unit": [2, 10], "from": [2, 3, 4, 7, 9, 11, 12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 28], "unsqueez": [2, 23], "add": [2, 3, 10, 11, 12, 13, 14, 18, 23, 28], "specifi": [2, 4, 10, 12, 13, 15, 16, 17, 19, 20, 21, 23, 25], "posit": [2, 23], "shapeequ": 2, "initializer_list": 2, "count": [2, 4, 21], "nbdim": 2, "volumenonneg": 2, "throw": 2, "where": [2, 3, 4, 5, 9, 11, 12, 15, 17, 18, 19, 20, 22, 23, 28], "ad": [2, 10, 11, 12, 13, 21, 23, 28], "w": [2, 8, 22], "r": [2, 15, 16, 20], "makeshap": 2, "conveni": 2, "tostr": 2, "lh": 2, "rh": 2, "compar": [2, 6, 7, 9, 12, 19, 23], "castsiz": 2, "setpeeraccess": 2, "enabl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 19, 20, 22, 23, 24], "buffers": 2, "getcommptrstensor": 2, "flags_siz": 2, "max_all_reduce_block": 2, "allocateipcmemori": 2, "destroyipcmemori": 2, "mbuffers": 2, "mbufferptr": 2, "difftyp": 2, "ptrdiff_t": 2, "getgpu": 2, "getcpu": 2, "getpin": 2, "getuvm": 2, "getgpudiff": 2, "getcpudiff": 2, "getpinneddiff": 2, "getuvmdiff": 2, "dealloc": 2, "getinst": 2, "bytestostr": 2, "mgpu": 2, "mcpu": 2, "mpin": 2, "muvm": 2, "mgpudiff": 2, "mcpudiff": 2, "mpinneddiff": 2, "muvmdiff": 2, "genericprompttuningparam": 2, "embeddingt": 2, "task": [2, 18, 20, 22, 24, 28], "prompttuningen": 2, "filltaskstensor": 2, "taskshost": 2, "numcontextrequest": 2, "reqbeamwidth": 2, "reqpromptlength": 2, "packedinput": 2, "optvec": 2, "floattyp": 2, "temperatur": [2, 12, 15], "minlength": [2, 12, 15], "repetitionpenalti": [2, 12, 15], "presencepenalti": [2, 12, 15], "frequencypenalti": [2, 12, 15], "topk": [2, 12, 15], "topp": [2, 12, 15], "randomse": [2, 12, 15], "toppdecai": [2, 12], "toppmin": [2, 12], "toppresetid": [2, 12], "beamsearchdiversityr": [2, 12], "lengthpenalti": [2, 12, 15], "draftacceptancethreshold": 2, "normalizelogprob": 2, "sever": [2, 4, 11, 13, 17, 18, 23, 28], "asciichar": 2, "msg": 2, "getlevel": 2, "setlevel": 2, "gpuspernod": [2, 12], "kdefaultgpuspernod": 2, "deviceid": 2, "istensorparallel": 2, "ispipelineparallel": 2, "getrank": [2, 12], "getgpuspernod": 2, "getgpuspergroup": 2, "getpipelineparallelrank": 2, "gettensorparallelrank": 2, "isfirstpipelineparallelrank": 2, "islastpipelineparallelrank": 2, "my": 2, "pipelin": [2, 3, 4, 5, 8, 12, 17], "getlastrank": 2, "getpipelineparallelgroup": 2, "validconfig": 2, "mpi": [2, 3, 4, 12, 21, 23], "mrank": 2, "mgpuspernod": 2, "mdeviceid": 2, "toolkit": [3, 9], "assembl": 3, "optim": [3, 4, 5, 7, 8, 9, 12, 17, 20, 23], "solut": 3, "perform": [3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 23], "larg": [3, 5, 9, 11, 17, 19, 20, 21, 23], "languag": [3, 5, 12, 21, 23], "infer": [3, 5, 6, 7, 12, 14, 16, 19, 20, 22, 23, 28], "offer": [3, 9], "effici": [3, 11, 12, 17, 19, 20], "nvidia": [3, 5, 6, 7, 9, 10, 17, 20, 23], "compon": [3, 4, 9, 11], "those": [3, 11, 12, 18, 19, 20, 21, 22, 23, 24], "well": [3, 4, 6, 11, 12, 19, 21, 22], "backend": [3, 4, 19], "triton": [3, 16], "server": [3, 6], "easili": [3, 23], "web": 3, "servic": 3, "through": [3, 4, 10, 11, 12, 13, 24], "As": [3, 11, 13, 17, 19, 22, 23], "user": [3, 4, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 25], "veri": [3, 9, 11, 12, 18, 19], "either": [3, 4, 17, 23], "your": [3, 9, 10, 12, 20, 21], "select": [3, 4, 9, 17, 23], "pre": [3, 11, 17, 20, 23], "onc": [3, 4, 10, 11, 12, 13, 17, 20, 21, 23], "must": [3, 4, 10, 11, 12, 16, 19, 22, 23, 24, 28], "framework": [3, 18, 23], "outsid": 3, "scope": 3, "checkpoint": [3, 22], "download": 3, "variou": [3, 11, 20], "To": [3, 4, 7, 10, 11, 16, 17, 20, 21, 22, 23], "illustr": [3, 13], "point": [3, 4, 6, 9, 11, 12, 19, 20, 22, 23], "lot": [3, 11, 19], "obtain": [3, 4, 12, 19, 23], "hub": 3, "nemo": [3, 18, 22], "pytorch": [3, 13, 18, 23], "equip": 3, "recreat": 3, "wai": [3, 11, 13, 17], "eas": 3, "alreadi": [3, 11, 12, 13, 19], "hand": 3, "standard": [3, 5, 23], "togeth": [3, 5, 11, 12, 16, 19, 22], "along": [3, 11, 23], "extens": [3, 18], "sampl": [3, 11, 15, 21, 28], "top": [3, 11, 12], "p": [3, 12, 15, 23], "exhaust": 3, "found": [3, 6, 10, 11, 12, 13, 22], "recommend": [3, 4, 6, 9, 10, 11, 12, 17, 19, 20], "onlin": [3, 7, 19, 20], "serv": [3, 4, 8, 9, 11, 19], "explain": [3, 12, 17, 19, 22, 23], "mention": [3, 12], "abov": [3, 4, 9, 10, 12, 16, 17, 20, 21], "ha": [3, 4, 5, 9, 10, 11, 12, 15, 16, 17, 18, 19, 22, 23], "built": [3, 4, 10, 12, 17, 19, 20, 23], "power": [3, 7, 9], "represent": [3, 12, 13], "deep": [3, 6, 7, 21, 23], "neural": [3, 13], "becom": [3, 9, 11, 13], "familiar": [3, 12], "core": [3, 5, 6, 8, 13, 18], "concept": 3, "section": [3, 19, 20, 23], "proceed": 3, "further": [3, 5, 9], "builder": 3, "That": [3, 4, 11, 12, 23], "create_network": 3, "method": [3, 5, 11, 12, 18, 20, 22, 23, 28], "inetworkdefinit": [3, 13], "simpl": [3, 10, 12, 13], "insert": [3, 13, 23], "iactivationlay": 3, "act_typ": [3, 23], "activationtyp": [3, 23], "default_trtnet": 3, "add_activ": 3, "trt_tensor": [3, 23], "_create_tensor": 3, "get_output": [3, 13], "even": [3, 4, 9, 11, 12, 17, 23], "easier": 3, "few": [3, 9, 20], "most": [3, 5, 6, 7, 9, 12, 17, 19, 21, 23], "deriv": [3, 17, 19], "partial": 3, "sigmoid": [3, 23], "special": [3, 5, 11], "advanc": [3, 10], "silu": [3, 23], "travers": 3, "transform": [3, 11, 17, 18], "expos": [3, 4, 10, 12, 19], "ilay": [3, 13], "next": [3, 6, 17, 28], "done": [3, 20, 23, 25, 26], "build_engin": 3, "build_serialized_network": 3, "everyth": 3, "work": [3, 4, 10, 11, 12, 13, 19, 20, 22, 23], "expect": [3, 4, 7, 11, 12, 19, 20, 23], "produc": [3, 12, 13, 19, 23], "ihostmemori": [3, 28], "store": [3, 4, 6, 11, 12, 17, 19, 22, 23], "binari": [3, 21, 23], "file": [3, 4, 11, 13, 18, 20, 21, 28], "emb": [3, 24], "known": [3, 4, 11, 23], "reason": [3, 11, 12, 23], "bound": [3, 7, 12, 17, 23, 28], "lead": [3, 13, 19], "code": [3, 4, 9, 10, 11, 12, 13, 20, 22, 23], "like": [3, 4, 9, 11, 12, 13, 17, 18, 19, 20, 22, 23], "two": [3, 4, 6, 10, 11, 12, 13, 18, 19, 20, 23, 24, 28], "out_featur": [3, 18, 24], "in_featur": [3, 18, 24], "fromfil": 3, "note": [3, 4, 9, 10, 12, 13, 16, 17, 18, 19, 22, 23, 28], "refit": 3, "featur": [3, 4, 9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 26], "refit_engin": 3, "One": [3, 18, 23], "techniqu": [3, 4, 5, 11, 19, 22], "improv": [3, 4, 5, 6, 7, 8, 9, 11, 12, 19, 20], "help": [3, 4, 10, 11, 13, 19, 20], "reduc": [3, 4, 5, 8, 10, 11, 17, 19, 23], "amount": [3, 17, 19, 28], "transfer": 3, "dram": 3, "comput": [3, 4, 5, 6, 7, 9, 11, 12, 17, 19, 20, 21, 23], "locat": [3, 10, 12, 13, 23], "multiprocessor": [3, 19], "overhead": [3, 4, 19], "small": [3, 4, 9, 11, 17, 19], "addit": [3, 4, 7, 10, 11, 21, 22, 23, 24], "cost": [3, 17], "classic": 3, "matrix": [3, 8, 11, 12, 23], "multipl": [3, 4, 11, 12, 13, 15, 19, 20, 23, 24], "matmul": [3, 11, 19, 22, 23], "preced": 3, "written": 3, "b": [3, 5, 6, 7, 8, 13, 16, 20, 23, 28], "global": [3, 4, 11], "read": [3, 4, 11, 12], "again": 3, "suboptim": 3, "why": [3, 17, 23], "identifi": [3, 4, 12, 16, 23], "automat": [3, 4, 12, 13, 17, 22, 23], "appli": [3, 11, 13, 18, 22, 23, 28], "With": [3, 11, 12], "instead": [3, 5, 10, 12, 13, 17, 19, 23], "twice": 3, "fuse": [3, 11], "algorithm": [3, 9, 11, 12, 18, 19, 23, 25], "possibl": [3, 4, 10, 11, 12, 17, 19, 23], "almost": [3, 17], "infinit": 3, "some": [3, 4, 11, 12, 13, 17, 18, 19, 20], "involv": [3, 9, 24], "modif": [3, 13], "flash": [3, 11], "multihead": [3, 5, 11, 23], "mani": [3, 4, 11, 19, 23], "arithmet": 3, "bmm": 3, "softmax": [3, 11, 23], "stand": [3, 19, 20], "product": [3, 7, 11, 12, 23], "interleav": [3, 11], "loop": [3, 4, 12, 19, 20], "non": [3, 4, 9, 11, 20, 23], "trivial": 3, "necessarili": [3, 17], "someth": 3, "discov": 3, "might": [3, 4, 9, 10, 17], "polyhedr": 3, "alwai": [3, 11, 12, 18], "risk": [3, 4, 19], "uncommon": 3, "overcom": [3, 11], "inevit": 3, "limit": [3, 9, 11, 12, 13, 17, 19, 20, 23], "mechan": [3, 4], "map": [3, 4, 11, 13, 15, 18, 20, 23, 25, 28], "cpp": [3, 4, 10, 11, 12, 14, 20, 23], "follow": [3, 4, 9, 10, 12, 13, 18, 19, 20, 22, 23], "interfac": [3, 28], "extend": [3, 23], "custom": [3, 5, 12, 20, 23, 28], "guid": [3, 9, 21, 23], "trigger": [3, 11, 12, 13, 19, 21], "encapsul": [3, 4, 11, 12, 23], "fairli": 3, "quantizetensorplugin": 3, "enqueu": [3, 17, 28], "inputdesc": 3, "invokequant": 3, "els": [3, 21, 23], "quantiz": [3, 5, 6, 11, 12, 14, 20, 23, 24, 25, 28], "cu": 3, "quantizedkernel": 3, "grid": 3, "detail": [3, 4, 11, 15, 17, 19, 20, 23, 25], "how": [3, 4, 10, 12, 14, 17, 20, 21, 22, 23], "head": [3, 5, 12, 14, 17, 23], "queri": [3, 5, 12, 14, 17, 20, 23], "group": [3, 5, 12, 14, 20, 22, 23, 24], "role": 3, "load": [3, 4, 8, 18, 19, 20, 25], "drive": 3, "typic": [3, 9, 12, 13, 17], "regress": [3, 4, 11, 12], "charg": [3, 12], "both": [3, 4, 6, 9, 11, 12, 13, 17, 19, 20, 22, 23, 24], "bodi": 3, "design": [3, 9, 19], "singl": [3, 4, 7, 8, 11, 12, 17, 19, 20, 22, 23, 25], "system": [3, 4, 6, 12, 19, 20], "commun": [3, 4, 12, 23], "primit": 3, "nccl": [3, 19, 23], "librari": [3, 10, 20], "presenc": [3, 12], "connect": 3, "nvswitch": 3, "dgx": [3, 12], "ncclplugin": 3, "allreduc": [3, 23], "allgath": [3, 23], "gather_dim": [3, 23], "tgt": [3, 23], "recv": [3, 23], "former": [3, 9], "split": [3, 11, 12, 17, 23, 28], "across": [3, 4, 7, 11, 12, 13, 19, 23], "entir": [3, 5, 17, 23], "sibl": 3, "distribut": [3, 11, 12, 17, 23], "subset": [3, 4, 12, 23], "happen": [3, 12], "boundari": [3, 12, 17], "balanc": [3, 12, 19], "bandwidth": [3, 5, 6, 7, 9, 12], "incur": 3, "issu": [3, 11], "less": [3, 6, 11, 12, 23], "term": [3, 19, 23], "continu": [3, 4, 7, 9, 11, 12], "throughput": [3, 4, 5, 6, 7, 11, 19], "reli": [4, 11, 13, 21, 22], "aim": [4, 18], "queue": 4, "elimin": 4, "inclus": [4, 12, 23], "newli": [4, 12], "arriv": 4, "via": [4, 10, 23], "hook": 4, "softwar": [4, 11, 12], "client": 4, "text": [4, 11, 12], "interact": 4, "Their": 4, "signatur": [4, 13, 23], "h": [4, 11, 12, 23], "These": [4, 5, 7, 8, 20], "invok": [4, 12, 13], "regular": [4, 11, 23], "interv": 4, "varieti": 4, "below": [4, 7, 8, 9, 11, 12, 13, 15, 16, 20], "entri": [4, 16], "getinferencerequestscallback": 4, "inferencerequest": [4, 15, 16], "take": [4, 11, 12, 13, 18, 19, 20, 23, 24], "maximum": [4, 7, 11, 12, 17, 20, 23], "accept": [4, 10, 12, 23], "neg": [4, 19, 23], "unbound": 4, "64": [4, 6, 7, 11, 12, 18, 20, 25, 28], "bit": [4, 6, 11, 12, 22, 23], "uniqu": [4, 11, 12, 18], "deliv": [4, 5, 8, 20], "sendresponsecallback": 4, "conform": 4, "boolean": [4, 12, 23], "error": [4, 12, 17], "messag": [4, 17, 23], "been": [4, 11], "encount": 4, "case": [4, 6, 9, 11, 12, 17, 20, 22, 23], "properli": [4, 19], "handl": [4, 5, 19, 23, 24], "Its": [4, 11, 12, 23], "reject": 4, "ani": [4, 13, 25, 28], "sent": 4, "correspond": [4, 11, 13, 16, 21, 22, 23, 24, 28], "being": [4, 11, 12, 20], "reus": [4, 17], "appear": [4, 11, 12, 23], "third": [4, 12], "argument": [4, 10, 12, 17, 19, 23], "stop": [4, 12, 13, 15, 21, 28], "pollstopsignalcallback": 4, "unordered_set": 4, "ensur": [4, 13], "report": [4, 17, 21], "returnbatchmanagerstatscallback": 4, "packag": [4, 10], "field": [4, 12, 18, 20, 22], "timestamp": [4, 21], "put_tim": 4, "tm": 4, "m": [4, 6, 20, 22, 23], "y": [4, 8, 10, 22], "counter": [4, 21], "increas": [4, 6, 7, 11, 12, 19, 21, 23], "monoton": 4, "over": [4, 6, 7, 9, 21, 23], "max": [4, 5, 6, 7, 15, 17, 21, 23], "page": [4, 7, 12, 14, 17, 23], "kv": [4, 5, 9, 12, 21, 23], "cach": [4, 9, 12, 21, 22, 23], "schedul": [4, 17, 21], "total": [4, 11, 12, 17, 18, 19, 21], "phase": [4, 5, 8, 12, 13, 15, 17, 19, 21, 23], "microbatch": [4, 21], "v1": [4, 19], "slot": [4, 12], "trtenginepath": 4, "modeltyp": 4, "scheme": [4, 20], "inflightbatch": 4, "inflightfusedbatch": 4, "tradit": 4, "lockstep": 4, "until": [4, 12, 19], "up": [4, 6, 7, 11, 16, 20], "dynam": [4, 23, 25, 28], "incorpor": 4, "under": [4, 9, 10], "soon": [4, 5, 6, 7, 8, 9, 20], "condit": [4, 13, 19, 23], "met": [4, 19], "leverag": [4, 5, 15], "fusion": [4, 13, 17, 22], "opportun": 4, "strictli": 4, "superior": 4, "schedulerpolici": [4, 19], "polici": [4, 17], "max_util": [4, 19], "while": [4, 5, 6, 8, 9, 12, 13, 17, 22, 23], "maxim": [4, 5, 7, 19], "paus": [4, 19], "restart": 4, "depend": [4, 7, 11, 12, 13, 17, 18, 19, 20, 23], "peak": [4, 5, 6], "guaranteed_no_evict": [4, 19], "conserv": [4, 19], "guarante": [4, 12, 19], "start": [4, 13, 19, 20, 21, 23, 25], "evict": 4, "trtgptmodeloptionalparam": [4, 19], "maxtoken": [4, 12, 17], "unspecifi": [4, 23], "consid": [4, 9, 12, 19, 20, 23], "freegpumemoryfract": [4, 12, 17], "attend": [4, 19], "slide": [4, 19, 23], "window": [4, 23], "streamingllm": [4, 23], "previou": [4, 6, 12], "mha": [4, 5, 11, 19, 23], "mqa": [4, 5, 8, 11, 19, 23], "fraction": [4, 12, 23], "minimum": [4, 11, 17, 23, 28], "enableblockreus": 4, "previous": [4, 5, 20], "enabletrtoverlap": 4, "partit": [4, 19], "concurr": [4, 6, 19], "hide": [4, 19], "howev": [4, 5, 11, 17, 19, 20], "give": [4, 19], "benefit": [4, 7, 9, 13, 19, 20], "big": [4, 19], "enough": [4, 11, 17, 19], "overlap": 4, "too": [4, 11, 19], "enablechunkedcontext": 4, "chunk": [4, 17, 23], "turn": [4, 10, 11, 17, 19, 28], "specif": [4, 6, 9, 10, 12, 13, 18, 20, 21, 23], "maxseqlength": [4, 12], "inputid": 4, "promptlength": 4, "gather_context_logit": [4, 12, 25, 28], "gather_all_token_logit": [4, 12], "otherwis": [4, 11, 12, 20, 23], "dummi": 4, "outputlength": 4, "gather_generation_logit": [4, 12, 25, 28], "gptmanagerbenchmark": [4, 21], "pleas": [4, 5, 8, 9, 11, 12, 13], "rememb": 4, "redund": 4, "much": [4, 17], "close": [4, 11, 17], "integr": 4, "item": [4, 28], "assum": [4, 23], "style": [4, 11], "autoregress": 4, "architectur": [4, 6, 10, 12, 14, 18, 20, 25], "spawn": 4, "worker": [4, 20], "constructor": [4, 12], "persist": [4, 9, 20], "intend": [4, 17], "back": 4, "safe": [4, 13], "retir": 4, "notifi": 4, "final_respons": 4, "intern": [4, 11, 17], "state": [4, 11, 12, 13, 23], "relat": [4, 10, 17, 21, 23], "freed": 4, "batchmanag": 4, "pathtotrtengin": 4, "trtgptmodeltyp": 4, "getinferencerequestscb": 4, "sendresponsecb": 4, "pollstopsignalcb": 4, "signal": 4, "returnbatchmanagerstatscb": 4, "stat": 4, "adjust": [4, 17, 19], "aggress": [4, 18, 19], "short": [4, 11, 28], "resum": 4, "visibl": 4, "effect": [4, 19], "latenc": [4, 6, 7, 9, 11, 12, 19, 23], "adopt": [4, 13, 18], "approach": [4, 13, 19], "know": [4, 19, 21], "suffici": [4, 19], "worst": 4, "consumpt": [4, 6, 11, 19], "termin": 4, "node": [4, 12, 19, 22, 23], "control": [4, 11, 12, 13, 19, 22, 23, 28], "cuda_visible_devic": 4, "care": 4, "taken": [4, 5, 6, 23], "broadcast": [4, 23], "seen": [4, 19], "hold": [4, 13, 16, 17, 24], "ident": [4, 23], "good": 4, "pair": [5, 23], "largest": [5, 6, 7, 12], "accur": 5, "sourc": [5, 8, 14, 18, 20, 23, 24, 25, 26, 27, 28], "141gb": 5, "fp8": [5, 7, 8, 9, 17, 23], "fit": [5, 6, 11], "eight": 5, "were": [5, 9, 12, 18, 20], "800": 5, "tok": [5, 7, 8, 20], "retain": [5, 7], "accuraci": [5, 19, 22, 23], "footprint": [5, 11, 17, 20], "great": 5, "preliminari": [5, 7, 8], "measur": [5, 7, 8, 9, 20], "subject": [5, 7, 8, 9, 23], "chang": [5, 7, 8, 10, 11, 17, 19, 23, 25, 28], "tp1": [5, 6, 7], "doe": [5, 11, 12, 17, 23, 28], "v0": [5, 6, 7, 8, 16], "7a": 5, "1xh200": 5, "order": [5, 11, 17, 19, 20, 23], "256": [5, 8, 12, 20, 21, 25], "128": [5, 6, 7, 8, 9, 11, 16, 20, 21, 26], "often": [5, 9, 12, 23], "advers": 5, "impact": [5, 9, 12, 19, 20], "decreas": [5, 6, 19], "4x": [5, 6, 7], "maintain": [5, 6, 9, 12, 22], "awar": 5, "weight": [5, 6, 9, 15, 16, 23, 24, 25, 28], "lin": 5, "et": 5, "al": 5, "2023": [5, 21], "compress": 5, "down": [5, 16, 23], "4bit": 5, "rel": [5, 23], "import": [5, 9, 12], "fp16": [5, 6, 9, 11, 16, 18, 23], "usag": [5, 8, 11, 13, 14, 19, 23], "capabl": [5, 10, 21], "performantli": 5, "hopper": [5, 6, 9, 10, 20], "similar": [5, 7, 11, 12, 13, 21, 23], "gqa": [5, 8, 11, 19, 20, 23], "achiev": [5, 9, 19], "out": [5, 6, 7, 8, 12, 15, 16, 17, 20, 21, 23], "803": 5, "2048": [5, 7, 8, 18, 20, 28], "941": [5, 8], "163": 5, "4096": [5, 20, 28], "946": 5, "263": 5, "llama2": [5, 11, 16], "8xh200": 5, "tp": [5, 6, 7, 8, 9, 12, 16, 20, 23], "dp": [5, 8], "960": 5, "192": 5, "560": 5, "96": [5, 20], "640": 5, "now": [5, 12, 18, 19], "6a": 5, "ainsli": 5, "variant": [5, 11, 23], "fewer": [5, 11, 19], "multi": [5, 10, 12, 14, 16, 18, 23, 25], "publish": [5, 8], "branch": [5, 8], "announc": [5, 8], "blog": [5, 8, 9], "calcul": [5, 6, 8, 19, 23], "second": [5, 7, 8, 12, 23], "out_tp": [5, 8], "output_seqlen": [5, 8], "total_lat": [5, 8], "glossari": [5, 8], "isl": [5, 6, 7, 8, 20], "osl": [5, 6, 7, 8, 20], "oom": [5, 8, 17], "evalu": [6, 7, 19], "amper": [6, 10, 20], "show": [6, 17], "faster": [6, 7, 11], "1st": [6, 17, 20, 23], "abl": [6, 11], "min": [6, 23], "applic": [6, 9, 15, 21], "10m": 6, "sxm": 6, "80gb": [6, 9, 20], "32": [6, 7, 17, 20, 22, 23], "sweep": 6, "success": 6, "j": [6, 9, 11, 12, 22, 23], "6b": [6, 23], "907": 6, "102": [6, 20], "185": [6, 20], "679": [6, 20], "481": 6, "111": 6, "speedup": [6, 8, 9], "0x": 6, "7x": 6, "behind": 6, "chart": 6, "tabl": [6, 9, 15, 19, 20, 23, 24, 28], "larger": [6, 7, 9, 11, 17, 20, 23], "stai": [6, 9], "tune": [6, 9, 14, 15, 17, 24, 28], "highlight": [6, 9], "llama": [6, 7, 9, 12, 16, 22], "come": [6, 12, 17], "recent": [6, 11], "demonstr": 6, "5x": [6, 9], "2x": [6, 7], "switch": [6, 9, 10, 17], "yet": 6, "anoth": [6, 11, 13, 16, 23], "speed": 6, "highest": [6, 7, 13], "center": [6, 7], "acceler": [6, 7, 8, 9, 11], "ai": 6, "hpc": 6, "analyt": 6, "cloud": 6, "edg": 6, "workstat": 6, "nativ": [6, 19], "doubl": [6, 17], "halv": [6, 23], "introduc": [6, 10, 22], "paper": [6, 16, 22], "format": [6, 9, 10, 17, 18, 28], "learn": [6, 7, 9, 23], "post": [6, 9, 18, 21], "consist": [6, 13, 22, 23], "encod": [6, 11, 12, 22, 23], "e4m3": 6, "expon": 6, "mantissa": 6, "e5m2": 6, "gradient": 6, "practic": [6, 7, 11, 14, 17], "perceiv": [6, 20], "w8a8": [6, 9], "mean": [6, 7, 11, 12, 17, 18, 19, 20, 21, 23, 26, 28], "8bit": 6, "819": 7, "9x": [7, 8], "hbm3e": 7, "llama_13b": 7, "1024": [7, 9, 19, 20, 23, 24], "750": 7, "349": 7, "llama_70b": 7, "512": [7, 8, 16, 20], "014": 7, "654": [7, 20], "341": 7, "303": 7, "v9": 7, "due": [7, 12, 19, 20], "offlin": [7, 20], "summar": [7, 9, 11, 17, 18, 19, 20], "scenario": [7, 9, 11, 18, 19], "70b": [7, 9, 11], "tp8": 7, "chat": 7, "agent": 7, "80": [7, 10, 20], "200": [7, 20], "gpt3": 7, "175b": 7, "hgx": 7, "6x": 7, "vari": 7, "shown": [7, 10, 12, 15, 23], "swept": 7, "newest": 7, "portfolio": 7, "8tb": 7, "expand": [7, 9, 23], "141": 7, "gigabyt": 7, "gb": [7, 10], "combin": [7, 13, 19, 20, 24], "convers": [8, 9], "greater": [8, 9, 11, 12, 23], "experi": [8, 9, 21], "doc": [8, 15, 16, 23], "gpt_attent": [8, 11, 13, 23], "look": [8, 10, 12, 21], "curv": 8, "equat": [8, 23], "tpot": 8, "axi": [8, 23], "flatten": [8, 16], "better": [8, 11, 19], "8xh100": 8, "8a": 8, "227": 8, "232": [8, 20], "25": 8, "300": 8, "deploy": 9, "imped": 9, "emerg": 9, "vital": [9, 13], "strategi": [9, 17, 23, 25], "address": [9, 17, 20], "bottleneck": 9, "overview": [9, 21], "share": [9, 10, 11, 13, 20, 23, 24], "appropri": [9, 19, 20], "tailor": 9, "unifi": [9, 18], "significantli": [9, 17], "dl": 9, "genai": 9, "hardwar": 9, "easi": 9, "mind": [9, 19], "line": [9, 17, 20], "focu": [9, 13, 21], "ptq": 9, "futur": [9, 10, 11, 17, 19, 20, 22, 23], "popular": [9, 11, 18], "impos": 9, "constraint": [9, 11, 23], "v2": [9, 22], "7b": [9, 16], "constrain": 9, "500m": 9, "notabl": 9, "3x": 9, "h100": 9, "sq": [9, 22], "40x": 9, "44x": 9, "30x": 9, "51x": 9, "47x": 9, "32x": 9, "mmlu": 9, "baselin": [9, 20], "loss": 9, "falcon": [9, 18, 22], "180b": 9, "70": [9, 20], "68": 9, "56": [9, 20], "int4": 9, "awq": 9, "69": 9, "85": [9, 17], "40b": 9, "55": [9, 21], "89": [9, 10], "54": 9, "50": [9, 12, 16], "07": 9, "87": 9, "67": 9, "75": 9, "01": 9, "mpt": [9, 22], "30b": 9, "47": 9, "46": 9, "compris": 9, "three": [9, 12, 22, 23], "primari": 9, "calibr": 9, "consider": 9, "prefer": [9, 10], "choic": [9, 17, 23, 28], "densiti": 9, "crucial": 9, "factor": [9, 12, 17, 22, 23, 25], "consequ": 9, "opt": [9, 18, 22, 23], "speicfic": 9, "suggest": [9, 11], "priorit": 9, "do": [9, 11, 13, 20, 21, 23], "meet": 9, "smoothquant": [9, 13], "gptq": 9, "toler": 9, "tradeoff": 9, "our": [9, 20, 21], "minut": 9, "w8a16": [9, 18], "w4a16": [9, 18], "ten": 9, "w4a8": 9, "ll": [9, 20], "normal": [9, 17, 23], "becaus": [9, 17, 19, 20, 23], "occupi": [9, 17], "neglig": 9, "long": [9, 11, 17, 19, 21], "re": 9, "ada": [9, 10, 20], "latter": [9, 19], "test": [9, 10, 11, 12, 19, 20], "machin": 9, "bring": 9, "upcom": 9, "instal": [10, 12, 20], "wheel": [10, 20], "instruct": 10, "linux": 10, "directli": [10, 12, 13, 19], "necessari": [10, 23], "who": 10, "best": [10, 11, 14, 20, 21], "debug": [10, 14, 17, 28], "abi": 10, "docker": [10, 20], "platform": 10, "lf": [10, 16, 20], "apt": 10, "github": [10, 20, 23], "com": [10, 20, 23], "cd": [10, 18, 20], "submodul": [10, 20], "recurs": [10, 20], "pull": [10, 20], "There": [10, 11, 12, 13, 17, 18, 19, 20, 22, 23], "imag": 10, "approxim": 10, "disk": 10, "space": [10, 17, 20], "63": [10, 21], "command": [10, 12, 17, 18, 20, 26], "release_build": 10, "cuda_arch": 10, "cmake": 10, "restrict": [10, 12, 23], "compil": [10, 12, 23], "real": [10, 13, 20], "90": [10, 19, 21], "release_run": 10, "local_us": [10, 20], "local": [10, 18, 20], "account": 10, "root": [10, 18, 20, 23], "insid": [10, 17, 23], "app": 10, "flexibl": [10, 12], "tag": 10, "devel": 10, "latest": 10, "shell": [10, 20], "target": [10, 19], "dockerfil": 10, "ipc": 10, "ulimit": 10, "memlock": 10, "stack": [10, 23], "67108864": 10, "pwd": 10, "workdir": 10, "script": [10, 16, 20, 22, 26], "build_wheel": [10, 20], "trt_root": [10, 20], "usr": [10, 18, 20], "deploi": [10, 18], "pip": [10, 20], "whl": [10, 20], "increment": 10, "clean": [10, 21], "purpos": [10, 11], "semicolon": 10, "separ": [10, 12, 19, 20, 23, 28], "cuda_architectur": 10, "86": 10, "cmakelist": 10, "txt": [10, 20], "particular": 10, "gptsession": [10, 12, 17], "interpret": 10, "relev": [10, 12], "consult": [10, 21], "understand": [10, 21], "cpp_onli": 10, "particularli": 10, "avoid": [10, 17], "problem": [10, 11], "dual": 10, "gcc": 10, "overridden": 10, "build_dir": 10, "libtensorrt_llm": 10, "against": 10, "libnvinfer_plugin_tensorrt_llm": 10, "project": [10, 11, 12, 16], "part": [10, 13, 19, 23], "sinc": [10, 11, 13, 17], "gptsessiontest": [10, 12], "multiqueri": 11, "quick": [11, 20], "remind": 11, "articl": 11, "arxiv": [11, 16, 22, 23], "org": [11, 16, 22, 23], "ab": [11, 22, 23], "1911": 11, "02150": 11, "2307": 11, "09288": 11, "discuss": 11, "remove_input_pad": [11, 12, 16, 19, 20, 23, 24, 26, 28], "shorter": [11, 12, 19], "max_sequence_length": [11, 28], "excess": 11, "unneed": [11, 19], "surround": 11, "1d": [11, 23], "context_fmha_typ": [11, 17], "slowest": 11, "signific": 11, "quadrat": [11, 17], "enabled_with_fp32_acc": 11, "accumul": [11, 23], "forc": [11, 20], "fp32": [11, 23], "vanilla": 11, "2205": 11, "14135": 11, "08691": 11, "extra": [11, 18], "plan": 11, "overal": 11, "mask": [11, 23, 24], "fly": [11, 20, 22, 23], "dequant": [11, 23], "ia3": 11, "occup": [11, 17], "multi_block_mod": [11, 19, 26], "exact": [11, 17], "definit": [11, 23], "hard": 11, "predict": 11, "rule": 11, "thumb": 11, "worth": 11, "num_head": [11, 19, 23, 25, 28], "processor": [11, 28], "evolv": [11, 22], "research": [11, 22], "conduct": 11, "heurist": [11, 23], "proport": 11, "warn": [11, 12, 17, 20, 23], "still": [11, 17, 20, 23], "experiment": [11, 12, 20, 22], "bf16": 11, "disable_xqa": 11, "decid": [11, 18, 21], "trtllm_force_xqa": 11, "shouldus": 11, "decoderxqarunn": 11, "decodermaskedmultiheadattent": 11, "go": [11, 19], "s0": 11, "s1": 11, "s2": 11, "relax": 11, "ineffici": 11, "resourc": 11, "origin": [11, 13], "common": [11, 12, 17, 23], "behavior": [11, 12, 17, 23], "fmha": [11, 17, 23], "except": [11, 12, 23], "integ": [11, 12, 19, 22, 23], "past": 11, "monolith": 11, "max_seqlen": [11, 23], "hidden_dim_per_head": [11, 23], "reach": [11, 12, 18, 19], "decompos": 11, "keep": [11, 12, 19, 21, 23], "track": 11, "recycl": 11, "simplifi": [11, 12, 23], "rest": 11, "bfloat16": [11, 20, 22], "kv_cache_quant_mod": [11, 23], "int8_kv_cach": [11, 22], "fp8_kv_cach": [11, 22], "kv_cache_scaling_factor": [11, 18], "invers": 11, "multipli": [11, 23], "fp_valu": 11, "quantized_valu": 11, "treat": [11, 23], "circular": 11, "n": [11, 12, 17, 18, 19, 20, 21, 22, 23], "max_attention_window_s": [11, 19, 23, 28], "overwrit": 11, "least": 11, "surpass": 11, "window_s": 11, "deal": [11, 13], "_note": 11, "doesn": 11, "simpli": [11, 20], "num_lay": [11, 24, 25, 28], "stabl": [11, 23], "sink": 11, "kept": [11, 23], "sink_token_length": [11, 28], "distanc": [11, 23], "rather": [11, 13], "enable_pos_shift": [11, 23, 24], "dens": [11, 16, 18, 23], "offici": 11, "save": [11, 17, 19, 20], "dense_context_fmha": [11, 23, 24], "reconstruct": [11, 23], "beam_width": [11, 15, 19, 23, 28], "si": 11, "bi": 11, "ti": 11, "stage": [11, 13, 17], "concaten": [11, 16, 23], "hidden": [11, 12, 16, 17, 23, 24], "3d": [11, 23], "batch_beam_s": [11, 23], "hidden_dim": [11, 23], "num_token": [11, 23], "word": [11, 12, 15, 23, 28], "pseudo": [11, 12, 22, 23], "seq": [11, 17], "context_phas": 11, "generation_phas": 11, "rotary_embedding_dim": [11, 23], "neox": [11, 12, 22], "form": [11, 23], "position_embedding_typ": [11, 18, 20, 23, 24, 25], "positionembeddingtyp": [11, 23, 24, 25], "rope_gpt_neox": [11, 20, 23], "rope_gptj": [11, 20, 23, 24], "slope": [11, 23], "constant": [11, 17, 23], "norm_factor": 11, "f": [11, 12, 20, 23], "q_scale": [11, 23, 24, 25], "sqrt": [11, 23], "head_siz": [11, 23, 25, 28], "On": [11, 19, 23], "broader": 11, "aspect": [11, 19], "kind": [11, 13], "accord": [11, 23, 24], "lightweight": 11, "t5": [11, 12, 22], "famili": 11, "ahead": 11, "ii": [11, 23], "implicit": [11, 23], "suit": 11, "max_dist": [11, 23, 24, 25], "compos": 12, "declar": [12, 13], "enc_dec": 12, "folder": [12, 20, 22], "gptmodelconfig": 12, "worldconfig": 12, "famou": 12, "mpi_comm_world": 12, "descript": [12, 15, 16, 23], "overload": 12, "longest": [12, 23], "addition": [12, 20], "enter": [12, 13], "getter": 12, "setter": 12, "vocabulari": [12, 24], "numlay": 12, "numhead": 12, "numkvhead": 12, "numer": [12, 14], "lmm": 12, "thing": 12, "cluster": 12, "collabor": [12, 23], "nvlink": [12, 19], "consecut": 12, "harder": 12, "absenc": 12, "advantag": 12, "interconnect": 12, "a100": 12, "mpiutil": 12, "comm_sess": 12, "simplic": 12, "mpirun": [12, 18, 20, 21], "talk": 12, "administr": 12, "program": [12, 19], "present": [12, 22], "allfinish": 12, "computelogit": 12, "generatetokensfromlogit": 12, "generationinput": 12, "generationoutput": 12, "mandatori": [12, 15, 18], "aka": [12, 15, 23], "eo": 12, "257": 12, "fill": [12, 23], "numtoken": 12, "match": [12, 13, 20, 23, 28], "made": 12, "embeddingbiasopt": 12, "ban": 12, "badwordslength": 12, "stopwordslength": 12, "let": [12, 13, 18, 23], "row": [12, 16, 22, 23], "prefix": [12, 18, 23], "diagram": 12, "inner": [12, 23], "packeds": 12, "acquisit": 12, "gptsessionbenchmark": [12, 20, 21], "lm": 12, "just": [12, 17, 21], "maxoutputlen": 12, "caller": 12, "samplingconfig": [12, 28], "0f": 12, "penal": 12, "irrespect": 12, "mutual": [12, 22], "exclus": [12, 22], "random": 12, "seed": 12, "decai": 12, "exponenti": 12, "factual": 12, "enhanc": [12, 17, 19], "0e": 12, "influenc": 12, "remain": [12, 13, 17, 19, 23], "greedi": [12, 15], "upper": [12, 17, 23], "divers": [12, 21], "longer": 12, "renam": 12, "beamsearchlengthpenalti": 12, "scalar": [12, 23], "gptdecod": 12, "satisfi": [12, 19], "biggest": 12, "individu": 12, "revisit": 12, "structur": [12, 13, 15, 17], "could": [12, 13, 17, 18, 19], "rebuild": 12, "gw": 13, "manipul": 13, "modifi": [13, 19], "facilit": 13, "gemm": [13, 17, 19], "finer": 13, "grain": 13, "alter": 13, "ideal": 13, "would": [13, 20], "nest": 13, "flow": 13, "scatter": 13, "get_par": [13, 23], "get_us": [13, 23], "consum": [13, 23], "replace_all_uses_with": [13, 23], "replac": [13, 17, 20, 23], "miss": [13, 20], "especi": 13, "opaqu": 13, "world": [13, 23], "wise": 13, "singleton": [13, 23], "flayerinfomemo": 13, "replace_input_with": 13, "replace_output_uses_with": 13, "redirect": 13, "patternrewrit": 13, "match_and_rewrit": 13, "complex": 13, "patternanalyz": 13, "analysi": [13, 14, 17], "analyz": [13, 21], "rewritepatternmanag": 13, "label": [13, 23], "privileg": [13, 20], "analysispatternmanag": 13, "certain": [13, 18], "manner": 13, "routin": 13, "subtract": 13, "test_graph_rewrit": 13, "naivepatternrewriter_replaceaddwithsub": 13, "replace_add_with_sub": 13, "root_lay": 13, "layertyp": 13, "elementwis": [13, 23], "separate_match_rewrit": 13, "as_lay": 13, "elementwiseoper": [13, 23], "elementwise_sum": 13, "subgraph": 13, "get_input": 13, "old": 13, "elementwise_sub": 13, "dangl": 13, "prune": [13, 23], "explicitli": 13, "skip": [13, 20], "mark_as_remov": 13, "unnecessari": 13, "four": [13, 18, 24], "nearli": 13, "never": [13, 19], "depriv": 13, "commonli": 13, "gptattentionpluginremovepaddingrewritepass": 13, "gpt_attention_plugin_remove_pad": 13, "plugin_v2": 13, "plugin_namespac": 13, "plugin_typ": 13, "gptattent": 13, "flayer": 13, "assert": [13, 23], "although": 13, "black": 13, "box": 13, "tensor_input": 13, "qkv": [13, 16, 18, 23, 24], "extern": [13, 17, 20, 28], "in_len": 13, "new_input": 13, "clone_input": 13, "arglist": 13, "new_out": 13, "replace_outputs_uses_with": 13, "quit": [13, 20], "u": 13, "fuseattentionwithbiaspass": 13, "graph_rewrit": 13, "rewrit": [14, 23], "workflow": [14, 23], "2b": 14, "lora": [14, 15, 23, 28], "gptmanag": [14, 15], "requestid": 15, "param": [15, 23, 25, 28], "omit": [15, 23], "request_output_len": 15, "num_input_token": 15, "suppli": 15, "runtime_top_k": 15, "runtime_top_p": 15, "len_penalti": 15, "repetition_penalti": 15, "min_length": 15, "presence_penalti": 15, "frequency_penalti": 15, "random_se": 15, "end_id": 15, "pad_id": 15, "embedding_bia": 15, "embed": [15, 23], "bad_words_list": [15, 28], "num_bad_word": 15, "bad": 15, "stop_words_list": [15, 28], "num_stop_word": 15, "prompt_embedding_t": [15, 24, 25, 28], "prompt": [15, 24, 28], "prompt_vocab_s": [15, 25, 28], "vocab": [15, 23, 28], "lora_weight": [15, 16], "num_lora_modules_lay": [15, 16], "x": [15, 16, 22, 23, 24, 25], "hi": [15, 16], "ho": [15, 16], "adapt": [15, 16], "lora_config": [15, 16], "module_id": [15, 16], "layer_idx": [15, 16], "adapter_s": [15, 16], "return_log_prob": 15, "return_context_logit": 15, "return_generation_logit": 15, "draft_input_id": 15, "num_draft_token": 15, "draft": 15, "inflight": [15, 16, 23], "qychen": 16, "luotuo": 16, "kunish": 16, "japanes": 16, "alpaca": 16, "base_model": 16, "hf": [16, 28], "convert_checkpoint": [16, 18], "output_dir": [16, 18, 20], "tmp": [16, 20], "llama_7b_with_lora_qkv": 16, "trt_ckpt": [16, 18], "hf_lora_dir": 16, "max_lora_rank": [16, 24, 25], "lora_target_modul": [16, 25, 28], "attn_q": 16, "attn_k": 16, "attn_v": 16, "trtllm": [16, 17, 18, 19, 20], "checkpoint_dir": [16, 18], "trt_engin": [16, 18], "gpt_attention_plugin": [16, 19, 20, 24, 26, 28], "context_fmha": [16, 19, 20, 26], "paged_kv_cach": [16, 19, 20, 26, 28], "gemm_plugin": [16, 18, 20, 24, 26], "lora_plugin": [16, 23, 26, 28], "numpi": [16, 23, 24], "hf_lora_convert": 16, "tensorrtllm_backend": 16, "loraweight": 16, "loraconfig": 16, "attn_qkv": 16, "num_lora_module_lay": 16, "ie": 16, "layer1": 16, "hidden_size_in": 16, "hidden_size_out": 16, "moduleid": 16, "layerid": 16, "pdf": 16, "2106": 16, "09685": 16, "moduletyp": 16, "compbin": 16, "attn_dens": 16, "mlp_h_to_4h": 16, "gate": [16, 23], "rmsnorm": [16, 23, 24], "mlp_4h_to_h": 16, "mlp_gate": 16, "question": 17, "At": [17, 24], "major": 17, "contributor": 17, "io": 17, "fix": 17, "chosen": 17, "portion": [17, 23], "live": 17, "profil": [17, 23], "affect": 17, "icudaengin": [17, 28], "device_memory_s": 17, "off": [17, 21], "explan": 17, "relationship": 17, "gptlmheadmodel": [17, 25], "linearli": 17, "max_num_token": [17, 19, 25], "fold": 17, "rang": [17, 21, 22, 23, 25], "workspac": [17, 23], "thu": [17, 23], "max_context_length": [17, 23, 24, 28], "bind": 17, "behav": 17, "neither": [17, 23], "nor": 17, "And": [17, 23, 24], "firstli": [17, 19], "No": 17, "left": [17, 19, 23], "whole": [17, 23], "buffermanag": 17, "driver": 17, "smi": 17, "concern": 17, "inspect": 17, "layout": 17, "theoret": 17, "though": 17, "succe": 17, "check_gpt_mem_usag": 17, "exceed": 17, "physic": [17, 23], "verbos": 17, "ye": [17, 23], "sequenti": 17, "shall": 17, "16x": 17, "timelin": 18, "emphasi": 18, "put": 18, "rich": 18, "team": 18, "effort": 18, "summaris": 18, "runner": 18, "ammo": 18, "modelrunn": [18, 28], "jax": 18, "deepspe": 18, "hyper": 18, "dictionari": [18, 24], "logits_dtyp": [18, 25], "float32": [18, 23, 24, 25], "vocab_s": [18, 20, 24, 25, 28], "max_position_embed": [18, 20, 24, 25], "null": [18, 20], "num_hidden_lay": [18, 20, 25], "num_attention_head": [18, 20, 24, 25], "num_key_value_head": [18, 20, 25], "hidden_act": [18, 20, 24, 25], "intermediate_s": [18, 20, 25], "norm_epsilon": [18, 20, 25], "1e": [18, 20, 23, 24, 25], "learned_absolut": [18, 23, 24, 25], "use_prompt_tun": [18, 25], "world_siz": [18, 20, 23, 25], "pp_size": [18, 20, 25], "quant_aglo": 18, "str": [18, 23, 24, 25, 26, 28], "kv_cache_quant_aglo": 18, "group_siz": [18, 23], "has_zero_point": 18, "pre_quant_scal": 18, "exclude_modul": 18, "sub": [18, 23], "optforcausallm": [18, 25], "w4a16_awq": 18, "w4a8_awq": 18, "w4a16_gptq": 18, "w8a8_sq_per_channel": 18, "do_layer_norm_befor": 18, "new_decoder_architectur": [18, 20], "parallel_attent": [18, 20], "hierarch": 18, "whose": [18, 24], "bias": [18, 23], "th": [18, 23], "input_layernorm": 18, "activation_scaling_factor": 18, "weights_scaling_factor": 18, "prequant_scaling_factor": 18, "wherea": 18, "out_fatur": 18, "transpos": [18, 23], "sai": [18, 21], "125m": 18, "rank0": 18, "rank1": 18, "768": [18, 20], "50272": 18, "use_weight_onli": 18, "weight_only_precis": 18, "use_parallel_embed": [18, 19, 20, 25], "embedding_sharding_dim": [18, 19, 20, 25], "share_embedding_t": [18, 20, 25], "export": [18, 20, 28], "924": 18, "100": 18, "engine_dir": [18, 20, 28], "test_trt_llm": 18, "hf_model_dir": [18, 25], "data_typ": 18, "check_accuraci": 18, "tensorrt_llm_rouge1_threshold": 18, "benchmark": [19, 21], "tool": 19, "strongli": 19, "reproduc": 19, "place": [19, 23], "roughli": 19, "estim": 19, "alpha": [19, 23], "rough": 19, "invoc": 19, "05": [19, 20, 23, 24, 25], "secondli": 19, "realist": 19, "benefici": 19, "plateau": 19, "satur": 19, "hurt": 19, "input_seq_len": 19, "empir": 19, "sequence_count": 19, "multiprocessor_count": 19, "fulli": 19, "henc": 19, "use_custom_all_reduc": [19, 26, 28], "pcie": 19, "shard": [19, 23, 24], "look_up": 19, "lm_head": 19, "aforement": 19, "lookup": [19, 23, 24], "correctli": 19, "use_embedding_shar": 19, "use_lookup_plugin": 19, "use_gemm_plugin": 19, "swiglu": [19, 20, 23], "use_fused_mlp": [19, 20], "workload": [19, 21], "bert_attention_plugin": [19, 26], "knob": 19, "tweak": [19, 20], "inflight_batch": 19, "inflight_fused_batch": 19, "max_tokens_in_paged_kv_cach": 19, "kv_cache_free_gpu_mem_fract": 19, "tend": 19, "translat": 19, "unset": 19, "unless": 19, "clearli": 19, "leav": 19, "95": 19, "goal": 19, "max_output_length": 19, "exce": [19, 23], "drop": 19, "reduct": [19, 23], "expens": 19, "enable_chunked_context": 19, "chanc": 19, "therebi": 19, "tokens_per_block": [19, 26, 28], "observ": 20, "29": 20, "168": 20, "120": 20, "472": 20, "961": 20, "149": 20, "mistral": [20, 22], "896": 20, "569": 20, "968": 20, "84": 20, "450": 20, "868": 20, "548": 20, "343": 20, "429": 20, "530": 20, "844": 20, "008": 20, "421": 20, "461": 20, "116": 20, "990": 20, "118": 20, "269": 20, "27": 20, "357": 20, "831": 20, "661": 20, "409": 20, "517": 20, "619": 20, "438": 20, "733": 20, "241": 20, "922": 20, "170": 20, "816": 20, "718": 20, "347": 20, "020": 20, "048": 20, "836": 20, "114": 20, "250": 20, "992": 20, "874": 20, "693": 20, "401": 20, "979": 20, "721": 20, "954": 20, "579": 20, "542": 20, "561": 20, "471": 20, "49": 20, "177": 20, "152": 20, "39": 20, "810": 20, "658": 20, "631": 20, "692": 20, "812": 20, "734": 20, "607": 20, "353": 20, "518": 20, "547": 20, "613": 20, "565": 20, "595": 20, "66": 20, "193": 20, "203": 20, "reflect": 20, "infight": 20, "31": 20, "30": 20, "78": 20, "37": 20, "61": 20, "79": 20, "708": 20, "93": 20, "769": 20, "35": 20, "235": 20, "76": 20, "463": 20, "right": [20, 23], "elev": 20, "uid": [20, 28], "gid": 20, "boot": 20, "slurm": 20, "pyxi": 20, "caus": 20, "makefil": 20, "nv_gpu": 20, "gpu_opt": 20, "mount": 20, "destin": 20, "docker_run_arg": 20, "fine": [20, 24], "repeatedli": 20, "ran": 20, "transit": 20, "hbm3": 20, "newer": 20, "quant_algo": 20, "kv_cache_quant_algo": 20, "find": 20, "trtllm_enable_xqa": 20, "displai": 20, "prepar": [20, 22, 23, 25], "gptj": 20, "ckpt_config": 20, "gptjforcausallm": [20, 25], "28": 20, "50400": 20, "gelu": [20, 23], "rotary_dim": 20, "strongly_typ": 20, "in_out_s": 20, "in_out": 20, "echo": 20, "awk": 20, "in_out_dim": 20, "warm_up": 20, "durat": 20, "num_run": 20, "input_output_len": 20, "llamaforcausallm": [20, 25], "11008": 20, "32000": 20, "rotary_bas": [20, 25], "10000": [20, 23, 24, 25], "rotary_sc": [20, 25], "8192": 20, "28672": 20, "oversubscrib": 20, "falconforcausallm": [20, 25], "14848": 20, "65024": 20, "engine_path": 20, "_": 20, "highli": 21, "metric": 21, "middl": 21, "ground": 21, "dive": 21, "outlin": 21, "toggl": 21, "region": 21, "extract": [21, 23], "log_iteration_data": 21, "stdout": 21, "metadata": 21, "249": 21, "231": 21, "2448": 21, "28784": 21, "540173600": 21, "239": 21, "6904": 21, "tllm_gptm_profile_start_stop": 21, "csv": 21, "tllm_gpts_profile_start_stop": 21, "tell": 21, "cudaprofilerapi": 21, "captur": 21, "repeat": [21, 23], "127": [21, 23], "resolut": 21, "bash": 21, "pmi_rank": 21, "mpich": 21, "slurm_procid": 21, "srun": 21, "ompi_comm_world_local_rank": 21, "eq": [21, 23], "nsy": 21, "nsys_mpi_store_teams_per_rank": 21, "nvtx": 21, "frequenc": 21, "100000": 21, "fi": 21, "profile_rank_0": 21, "env": 21, "ieee": 22, "scale": [22, 23], "satfinit": 22, "fp": 22, "static_cast": 22, "2d": [22, 23], "column": [22, 23], "channel": 22, "mi": 22, "ni": 22, "2211": [22, 23], "10438": 22, "downstream": 22, "preprocess": 22, "2210": 22, "17323": 22, "2306": 22, "00978": 22, "weightonlygroupwisequantmatmulplugin": 22, "weight_only_groupwise_quant_matmul": 22, "baichuan": 22, "bert": [22, 23], "blip": 22, "chatglm": [22, 23], "v3": 22, "flan": 22, "internlm": 22, "phi": 22, "replit": 22, "santacod": 22, "skywork": 22, "starcod": 22, "int4_weight": 22, "w4a": 22, "int8_weight": 22, "w8a": 22, "a8": 22, "per_channel": 22, "per_token": 22, "per_group": 22, "fp8_qdq": 22, "allreducestrategi": 23, "intenum": 23, "customallreducekernel": 23, "sync": [23, 28], "oneshot": 23, "ring": 23, "twoshot": 23, "attentionmasktyp": [23, 24], "bidirect": 23, "bidirectionalglm": 23, "causal": 23, "dimrang": 23, "tupl": [23, 24, 28], "ctor": 23, "layernormpositiontyp": [23, 25], "pre_layernorm": [23, 25], "layernormtyp": [23, 25], "groupnorm": [23, 24], "mlptype": [23, 25], "fusedgatedmlp": [23, 24], "gatedmlp": [23, 24], "alibi": 23, "alibi_with_scal": 23, "from_str": 23, "is_alibi": 23, "is_rop": 23, "rotaryscalingtyp": 23, "dim_rang": 23, "is_network_input": 23, "tensorloc": 23, "cast": 23, "properti": [23, 28], "is_dynam": 23, "exclud": 23, "is_trt_wrapp": 23, "itensor": 23, "differenti": 23, "inherit": 23, "hierarchi": 23, "mark_output": 23, "respons": 23, "keepdim": 23, "ndim": 23, "permut": 23, "new_tensor": 23, "undefin": 23, "split_size_or_sect": 23, "dim0": 23, "dim1": 23, "zero_is_placehold": 23, "unaryoper": 23, "closur": 23, "round": 23, "exp": 23, "sin": 23, "iunarylay": 23, "unari": 23, "tanh": 23, "mul": 23, "prod": 23, "div": 23, "gt": 23, "lt": 23, "op_and": 23, "AND": 23, "op_or": 23, "OR": 23, "pow": 23, "ielementwiselay": 23, "union": 23, "amongst": 23, "particip": 23, "pattern": 23, "therefor": 23, "section_s": 23, "contribut": 23, "deeplearn": 23, "html": 23, "replic": 23, "deleg": 23, "choos": 23, "arang": 23, "ifilllay": 23, "filloper": 23, "linspac": 23, "_str_to_trt_dtype_dict": 23, "_util": 23, "argmax": 23, "onnx": 23, "blob": 23, "md": 23, "avg_pool2d": 23, "kernel_s": [23, 24], "stride": [23, 24], "ceil_mod": [23, 24], "count_include_pad": [23, 24], "bert_attent": 23, "relative_attent": [23, 24, 25], "relative_attention_bia": 23, "1706": 23, "03762": 23, "sum_of_token": 23, "bertattentionplugin": 23, "max_seq_len": [23, 25, 28], "num_bucket": [23, 24, 25], "broadcast_help": 23, "split_siz": 23, "clip": 23, "beta": 23, "inp": 23, "jj": 23, "len": [23, 28], "ndarrai": 23, "iconstantlay": 23, "serial": [23, 28], "constant_to_tensor_": 23, "conv1d": [23, 24], "dilat": [23, 24], "conv2d": [23, 24], "conv_transpose2d": 23, "output_pad": [23, 24], "cumsum": 23, "ilooplay": 23, "einsum": 23, "einsum_eq": 23, "ieinsumlay": 23, "summat": 23, "einstein": 23, "convent": 23, "ascii": 23, "letter": 23, "comma": [23, 28], "subscript": 23, "diagon": 23, "ax": 23, "express": 23, "alphabet": 23, "arrow": 23, "ij": 23, "jk": 23, "ik": 23, "equival": 23, "ellipsi": 23, "syntax": 23, "rubric": 23, "ji": 23, "kj": 23, "dot": 23, "ijk": 23, "ikl": 23, "ijl": 23, "elementwise_binari": 23, "sharding_dim": [23, 24], "tp_rank": [23, 24], "among": 23, "transposit": 23, "default_net": 23, "plugin_config": [23, 25], "lookup_plugin": [23, 26], "igatherlay": 23, "tg_group": 23, "expand_shap": 23, "expans": 23, "islicelay": 23, "verifi": 23, "shrunk": 23, "behaviour": 23, "expand_dim": 23, "ishufflelay": 23, "new_shap": 23, "append": 23, "shuffl": 23, "expand_dims_lik": 23, "expand_mask": 23, "tgt_len": [23, 24], "src_seq_len": 23, "tgt_seq_len": 23, "3rd": 23, "2nd": 23, "revers": 23, "gatherel": 23, "gather_last_token_logit": 23, "last_tokens_id": 23, "geglu": 23, "generate_alibi_bias": 23, "key_length": 23, "05100": 23, "generate_alibi_slop": 23, "alibi_scal": 23, "past_key_valu": [23, 24], "host_past_key_value_length": [23, 24], "host_max_attention_window_s": [23, 24], "host_sink_token_length": [23, 24], "context_length": [23, 24, 28], "host_request_typ": [23, 24, 25], "num_kv_head": [23, 24, 25, 28], "hidden_size_per_head": 23, "rotary_embedding_bas": [23, 24], "rotary_embedding_scale_typ": 23, "rotary_embedding_scal": [23, 24], "rotary_embedding_max_posit": 23, "kv_orig_quant_scal": 23, "kv_quant_orig_scal": 23, "mask_typ": 23, "alibi_slop": 23, "kv_cache_block_point": [23, 24, 28], "host_kv_cache_block_point": [23, 24, 28], "do_cross_attent": [23, 24], "cross_qkv": 23, "cross_qkv_length": 23, "encoder_input_length": [23, 24, 28], "host_context_length": [23, 24, 28], "qkv_bia": 23, "use_cach": [23, 24, 25], "medusa_position_offset": [23, 24, 25, 28], "medusa_packed_mask": [23, 24, 25], "progress": 23, "hint": 23, "regard": 23, "merg": 23, "qkv_dim": 23, "contigu": 23, "max_block": 23, "num_tokens_per_block": 23, "cache_indir_t": 23, "cyclic": 23, "rope": 23, "theta": [23, 24], "ignor": 23, "rotari": 23, "glm": 23, "10b": 23, "max_blocks_per_sequ": 23, "cross": 23, "shift": 23, "medusa": [23, 28], "num_medusa_token": [23, 28], "divup": 23, "group_norm": 23, "num_group": [23, 24], "ep": [23, 24], "todo": 23, "index_select": 23, "5th": 23, "interpol": 23, "scale_factor": 23, "nearest": 23, "align_corn": 23, "recompute_scale_factor": 23, "antialia": 23, "is_gated_activ": 23, "layer_norm": 23, "normalized_shap": [23, 24], "use_diff_of_squar": 23, "norm": 23, "simplest": 23, "gamma": 23, "formula": 23, "varianc": 23, "squar": 23, "var": 23, "epsilon": 23, "in_hidden_s": 23, "out_hidden_s": 23, "transa": 23, "transb": 23, "max_low_rank": 23, "lora_rank": 23, "lora_weights_point": 23, "lora_id": 23, "low_rank": 23, "in_point": 23, "out_point": 23, "masked_select": 23, "don": 23, "nonzero": 23, "gatherv2": 23, "mat2": 23, "use_fp32_acc": 23, "imatrixmultiplylay": 23, "decis": 23, "ireducelay": 23, "non_gated_vers": 23, "outer": 23, "vec2": 23, "p2p": 23, "ncclrecv": 23, "repeat_interleav": 23, "repetit": 23, "rms_norm": 23, "06": [23, 24, 25], "weig": 23, "selective_scan": 23, "delta": 23, "delta_bia": 23, "z": 23, "dstate": 23, "is_variable_b": 23, "is_variable_c": 23, "delta_softplu": 23, "seq_len": [23, 24], "ssm": 23, "softplu": 23, "ncclsend": 23, "samplemod": 23, "emul": 23, "strict_bound": 23, "coordin": 23, "isoftmaxlay": 23, "threshold": 23, "nn": 23, "revert": 23, "ith": 23, "squared_relu": 23, "contact": 23, "untouch": 23, "enforc": 23, "iselectlay": 23, "mish": 24, "apply_query_key_layer_sc": [24, 25], "attention_head_s": 24, "attention_mask_typ": 24, "rotary_embedding_sc": 24, "rotary_embedding_percentag": [24, 25], "use_auto_parallel": [24, 25], "quant_mod": [24, 25, 28], "cross_attent": [24, 28], "dense_bia": 24, "clip_qkv": 24, "kv_cache_param": [24, 25], "attention_param": [24, 25], "encoder_output": [24, 25, 28], "position_embed": 24, "norm_before_bmm1": 24, "lora_layer_param": 24, "attentionparam": [24, 25], "encoder_max_input_length": [24, 28], "is_valid": 24, "is_valid_cross_attn": 24, "bertattent": 24, "keyvaluecacheparam": [24, 25], "fill_none_tensor_list": 24, "list_siz": 24, "get_first_host_kv_cache_block_point": 24, "get_first_kv_cache_block_point": 24, "get_first_past_key_valu": 24, "ropeembeddingutil": 24, "apply_rotary_pos_emb": 24, "pos_emb_typ": 24, "apply_rotary_pos_emb_chatglm": 24, "create_sinusoidal_posit": 24, "num_po": 24, "rotate_every_two": 24, "rotate_half": 24, "make_causal_mask": 24, "bsz": 24, "past_key_values_length": 24, "output_dtyp": 24, "in_channel": 24, "out_channel": 24, "padding_mod": 24, "convtranspose2d": 24, "output_s": 24, "num_embed": 24, "embedding_dim": 24, "prompttuningembed": 24, "supplementari": 24, "assign": 24, "adequ": 24, "task_vocab_s": 24, "logic": 24, "num_task": 24, "num_tokens_per_task": 24, "alia": 24, "use_fp8": 24, "share_weight": 24, "strict_dtyp": 24, "lora_runtime_param": 24, "loraruntimeparam": 24, "multiply_gath": 24, "multiply_reduc": 24, "num_channel": 24, "affin": 24, "elementwise_affin": 24, "avgpool2d": 24, "baichuanforcausallm": 25, "kwarg": [25, 28], "decodermodelforcausallm": 25, "bertforquestionansw": 25, "type_vocab_s": 25, "pad_token_id": 25, "is_roberta": 25, "num_label": 25, "token_type_id": 25, "bertforsequenceclassif": 25, "bertmodel": 25, "bloomforcausallm": 25, "bloommodel": 25, "pretrainedconfig": 25, "prompt_task": [25, 28], "chatglmforcausallm": 25, "check_config": 25, "pretrainedmodel": 25, "chatglmmodel": 25, "decodermodel": 25, "encoder_num_head": 25, "encoder_hidden_s": 25, "encoder_head_s": 25, "encoder_num_kv_head": 25, "has_position_embed": [25, 28], "has_embedding_layernorm": 25, "has_embedding_scal": 25, "has_attention_qkvo_bia": 25, "has_mlp_bia": 25, "has_model_final_layernorm": 25, "layernorm_ep": 25, "layernorm_posit": 25, "layernorm_typ": 25, "mlp_type": 25, "rescale_before_lm_head": 25, "has_lm_head_bia": 25, "residual_sc": 25, "generationmixin": 25, "decoder_input_id": 25, "cross_attention_mask": [25, 28], "max_decoder_input_len": 25, "max_encoder_input_len": 25, "brief": [25, 28], "fed": 25, "encodermodel": 25, "prompt_embedding_table_s": 25, "falconmodel": 25, "gptjmodel": 25, "inter_s": 25, "moe_config": 25, "moeconfig": 25, "num_expert": 25, "top_k": 25, "tp_mode": 25, "parallelismmod": 25, "tensor_parallel": 25, "normalization_mod": 25, "expertscalenormalizationmod": 25, "renorm": 25, "gptmodel": 25, "lora_param": 25, "max_draft_len": 25, "gptneoxforcausallm": 25, "gptneoxmodel": 25, "topmodelmixin": 25, "default_plugin_config": 25, "to_trt": 25, "classmethod": [25, 28], "from_hugging_fac": 25, "hug": 25, "face": 25, "val": 25, "scaling_typ": 25, "llamamodel": 25, "mptforcausallm": 25, "mptmodel": 25, "mambalmheadmodel": 25, "conv_stat": 25, "ssm_state": 25, "medusaforcausallm": 25, "optmodel": 25, "phiforcausallm": 25, "phimodel": 25, "quant_kwarg": 25, "dict": [25, 28], "from_dict": 25, "from_json_fil": 25, "config_fil": 25, "set_if_not_exist": 25, "set_rank": 25, "to_dict": 25, "from_checkpoint": 25, "ckpt_dir": 25, "from_config": 25, "position_encoding_2d": 25, "qwenforcausallm": 25, "seq_length": 25, "mlp_hidden_s": 25, "neox_rotary_styl": 25, "rms_norm_ep": 25, "qwenmodel": 25, "skyworkforcausallm": 25, "whisperencod": 25, "n_mel": 25, "n_ctx": 25, "n_state": 25, "n_head": 25, "n_layer": 25, "quantize_model": 25, "pluginconfig": 26, "smooth_quant_gemm_plugin": 26, "identity_plugin": 26, "layernorm_quantization_plugin": 26, "rmsnorm_quantization_plugin": 26, "nccl_plugin": 26, "weight_only_groupwise_quant_matmul_plugin": 26, "weight_only_quant_matmul_plugin": 26, "quantize_per_token_plugin": 26, "quantize_tensor_plugin": 26, "context_fmha_fp32_acc": 26, "enable_xqa": 26, "attention_qk_half_accumul": 26, "use_paged_context_fmha": 26, "use_context_fmha_for_gener": [26, 28], "to_legacy_set": 26, "legaci": 26, "migrat": 26, "central": 26, "intflag": 27, "chatglmgenerationsess": 28, "debug_tensors_to_sav": 28, "cuda_graph_mod": 28, "generationsequ": 28, "seq_idx": 28, "batch_idx": 28, "get_batch_idx": 28, "idx": 28, "get_seq_idx": 28, "buffer_alloc": 28, "cuda_stream_guard": 28, "exit": 28, "sampling_config": 28, "no_repeat_ngram_s": 28, "output_sequence_length": 28, "return_dict": 28, "stopping_criteria": 28, "stoppingcriteria": 28, "logits_processor": 28, "logitsprocessor": 28, "decode_batch": 28, "decode_regular": 28, "ite": 28, "sequence_limit_length": 28, "decode_stream": 28, "dump_debug_buff": 28, "early_stop_criteria": 28, "should_stop": 28, "filter_medusa_logit": 28, "best_path": 28, "best_path_length": 28, "medusa_logit": 28, "nmh": 28, "nmt": 28, "finalize_decod": 28, "in_progress": 28, "find_best_medusa_path": 28, "next_logit": 28, "temp": 28, "first_lay": 28, "get_next_medusa_token": 28, "next_medusa_logit": 28, "handle_per_step": 28, "next_step_tensor": 28, "runtimetensor": 28, "has_token_type_embed": 28, "is_medusa_mod": 28, "last_lay": 28, "max_medusa_token": 28, "max_prompt_embedding_table_s": 28, "medusa_path": 28, "medusa_temperatur": 28, "medusa_topk": 28, "medusa_tree_id": 28, "next_medusa_input_id": 28, "num_heads_kv": 28, "num_medusa_head": 28, "pp_communicate_final_output_id": 28, "final_output_id": 28, "pp_communicate_new_token": 28, "cache_indir": 28, "process_logits_for_medusa_mod": 28, "context_has_medusa_token": 28, "_runtim": 28, "lora_manag": 28, "loramanag": 28, "lora_uid": 28, "medusa_choic": 28, "update_kv_cache_draft_token_loc": 28, "best_path_len": 28, "update_output_ids_by_offset": 28, "new_generated_id": 28, "use_lora_plugin": 28, "memory_pool": 28, "max_blocks_per_seq": 28, "sink_token_len": 28, "use_one_more_block": 28, "add_sequ": 28, "context_len": 28, "get_pointer_arrai": 28, "logitsprocessorlist": 28, "mambalmheadmodelgenerationsess": 28, "mamba_d_conv": 28, "mamba_d_st": 28, "mamba_expand": 28, "model_nam": 28, "factori": 28, "hf_modules_to_trtllm_modul": 28, "trtllm_modules_to_hf_modul": 28, "modelrunnermixin": 28, "from_dir": 28, "lora_dir": 28, "lora_ckpt_sourc": 28, "batch_input_id": 28, "prompt_table_path": 28, "parametr": 28, "npy": 28, "nemo_prompt_convert": 28, "criteria": 28, "hoc": 28, "output_id": 28, "context_logit": 28, "generation_logit": 28, "serialize_engin": 28, "vocab_size_pad": 28, "qwenforcausallmgenerationsess": 28, "global_max_input_length": 28, "global_max_output_length": 28, "runtime_rank": 28, "iexecutioncontext": 28, "create_execution_context": 28, "from_engin": 28, "from_serialized_engin": 28, "infer_shap": 28, "tensorinfo": 28, "everi": 28, "Or": 28, "set_input_shap": 28, "manual": 28, "succeed": 28, "async": 28, "set_shap": 28, "tensor_dict": 28, "stoppingcriterialist": 28, "to_word_list_format": 28, "word_dict": 28, "add_special_token": 28, "sentenc": 28, "am": 28, "happi": 28, "sad": 28}, "objects": {"": [[2, 0, 1, "_CPPv48nvinfer1", "nvinfer1"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [2, 0, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [2, 0, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr", "tensorrt_llm::runtime::BufferManager::BufferManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool::device"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi", "tensorrt_llm::runtime::BufferManager::memoryPoolFree::device"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved::device"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::device"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed::device"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5beginEv", "tensorrt_llm::runtime::BufferRange::begin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange5beginEv", "tensorrt_llm::runtime::BufferRange::begin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange6cbeginEv", "tensorrt_llm::runtime::BufferRange::cbegin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange6cbeginEv", "tensorrt_llm::runtime::BufferRange::cbegin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4cendEv", "tensorrt_llm::runtime::BufferRange::cend"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4cendEv", "tensorrt_llm::runtime::BufferRange::cend"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange14const_iteratorE", "tensorrt_llm::runtime::BufferRange::const_iterator"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange13const_pointerE", "tensorrt_llm::runtime::BufferRange::const_pointer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange15const_referenceE", "tensorrt_llm::runtime::BufferRange::const_reference"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange3endEv", "tensorrt_llm::runtime::BufferRange::end"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange3endEv", "tensorrt_llm::runtime::BufferRange::end"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange8iteratorE", "tensorrt_llm::runtime::BufferRange::iterator"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mDataE", "tensorrt_llm::runtime::BufferRange::mData"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mSizeE", "tensorrt_llm::runtime::BufferRange::mSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]::index"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]::index"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange7pointerE", "tensorrt_llm::runtime::BufferRange::pointer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange9referenceE", "tensorrt_llm::runtime::BufferRange::reference"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4sizeEv", "tensorrt_llm::runtime::BufferRange::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange9size_typeE", "tensorrt_llm::runtime::BufferRange::size_type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange10value_typeE", "tensorrt_llm::runtime::BufferRange::value_type"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [2, 1, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [2, 1, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [2, 1, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [2, 5, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [2, 1, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [2, 5, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::endIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxAttentionWindow"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::sinkTokenLength"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE", "tensorrt_llm::runtime::DecodingInput::badWordsList"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE", "tensorrt_llm::runtime::DecodingInput::finished"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", "tensorrt_llm::runtime::DecodingInput::logits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12maxBatchSizeE", "tensorrt_llm::runtime::DecodingInput::maxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE", "tensorrt_llm::runtime::DecodingInput::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbs"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::isDone"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScores"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScores"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeams"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsTgt"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsTgt"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::ids"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE", "tensorrt_llm::runtime::DecodingOutput::finished"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE", "tensorrt_llm::runtime::GenerationInput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE", "tensorrt_llm::runtime::GenerationInput::Base"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::lengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::packed"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::padId"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenerationInput::TensorPtr"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE", "tensorrt_llm::runtime::GenerationOutput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE", "tensorrt_llm::runtime::GenerationOutput::Base"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::lengths"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenerationOutput::TensorPtr"], [2, 1, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::lengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::packed"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::padId"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput::PromptTuningParams"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput::TTensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenericGenerationInput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE", "tensorrt_llm::runtime::GenericGenerationInput::badWordsList"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE", "tensorrt_llm::runtime::GenericGenerationInput::embeddingBias"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE", "tensorrt_llm::runtime::GenericGenerationInput::endId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE", "tensorrt_llm::runtime::GenericGenerationInput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE", "tensorrt_llm::runtime::GenericGenerationInput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE", "tensorrt_llm::runtime::GenericGenerationInput::maxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE", "tensorrt_llm::runtime::GenericGenerationInput::packed"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE", "tensorrt_llm::runtime::GenericGenerationInput::padId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE", "tensorrt_llm::runtime::GenericGenerationInput::promptTuningParams"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE", "tensorrt_llm::runtime::GenericGenerationInput::stopWordsList"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE", "tensorrt_llm::runtime::GenericGenerationOutput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE", "tensorrt_llm::runtime::GenericGenerationOutput::Callback"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput::lengths"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE", "tensorrt_llm::runtime::GenericGenerationOutput::TTensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenericGenerationOutput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE", "tensorrt_llm::runtime::GenericGenerationOutput::contextLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE", "tensorrt_llm::runtime::GenericGenerationOutput::cumLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE", "tensorrt_llm::runtime::GenericGenerationOutput::generationLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE", "tensorrt_llm::runtime::GenericGenerationOutput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE", "tensorrt_llm::runtime::GenericGenerationOutput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE", "tensorrt_llm::runtime::GenericGenerationOutput::logProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE", "tensorrt_llm::runtime::GenericGenerationOutput::onTokenGenerated"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::decodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::decodingOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::finalOutputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::manager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE", "tensorrt_llm::runtime::GptDecoder::mLogProbsTiled"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE", "tensorrt_llm::runtime::GptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatch::CudaStreamPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingInputPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingOutputPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSizePadded"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatch::TensorPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::finalize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv", "tensorrt_llm::runtime::GptDecoderBatch::finalize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::finalize::batchIdx"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync::e"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv", "tensorrt_llm::runtime::GptDecoderBatch::getAllNewTokens"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbSteps"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens::iter"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getParentIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mAcceptByLogitsE", "tensorrt_llm::runtime::GptDecoderBatch::mAcceptByLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mActualBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE", "tensorrt_llm::runtime::GptDecoderBatch::mBeamWidths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatch::mBufferManager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mCurandStatesE", "tensorrt_llm::runtime::GptDecoderBatch::mCurandStates"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE", "tensorrt_llm::runtime::GptDecoderBatch::mDecoders"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingInputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingOutputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mDraftLogitsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mDraftProbsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftTokenIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE", "tensorrt_llm::runtime::GptDecoderBatch::mFinished"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mFinishedStepsE", "tensorrt_llm::runtime::GptDecoderBatch::mFinishedSteps"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE", "tensorrt_llm::runtime::GptDecoderBatch::mFinishedSum"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardToken"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mGeneratedTokensPerStepE", "tensorrt_llm::runtime::GptDecoderBatch::mGeneratedTokensPerStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingInput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingOutput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mMaxAttentionWindowE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxAttentionWindow"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mMaxTokensPerStepE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE", "tensorrt_llm::runtime::GptDecoderBatch::mNbSteps"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mNumDraftTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mSinkTokenLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mSinkTokenLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE", "tensorrt_llm::runtime::GptDecoderBatch::mStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE", "tensorrt_llm::runtime::GptDecoderBatch::mStreams"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mTargetProbsE", "tensorrt_llm::runtime::GptDecoderBatch::mTargetProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSizePadded"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::inputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::outputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::batchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::samplingConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest::batchIdx"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxAttentionWindow"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxTokensPerStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::sinkTokenLength"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mGptModelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE", "tensorrt_llm::runtime::GptModelConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::hiddenSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbHeads"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbLayers"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::vocabSize"], [2, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGlm"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGpt"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits::computeContextLogits"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits::computeGenerationLogits"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig27getContextFMHAForGenerationEv", "tensorrt_llm::runtime::GptModelConfig::getContextFMHAForGeneration"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv", "tensorrt_llm::runtime::GptModelConfig::getDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::GptModelConfig::getHiddenSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::GptModelConfig::getLoraModules"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxBatchSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::GptModelConfig::getMaxBeamWidth"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxDraftLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxDraftLen"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxInputLen"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::GptModelConfig::getMaxLoraRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::GptModelConfig::getMaxNumTokens"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxPromptEmbeddingTableSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxSequenceLen"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv", "tensorrt_llm::runtime::GptModelConfig::getMaxTokensPerStep"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMlpHiddenSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv", "tensorrt_llm::runtime::GptModelConfig::getModelVariant"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbHeads"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbKvHeads"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers::pipelineParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::GptModelConfig::getPagedContextFMHA"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv", "tensorrt_llm::runtime::GptModelConfig::getQuantMode"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::GptModelConfig::getSizePerHead"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::GptModelConfig::getTokensPerBlock"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::GptModelConfig::getVocabSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded::worldSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeContextLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeGenerationLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE", "tensorrt_llm::runtime::GptModelConfig::mDataType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::GptModelConfig::mHiddenSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE", "tensorrt_llm::runtime::GptModelConfig::mInputPacked"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mLoraModulesE", "tensorrt_llm::runtime::GptModelConfig::mLoraModules"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::GptModelConfig::mMaxBeamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxDraftLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxInputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::GptModelConfig::mMaxLoraRank"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::GptModelConfig::mMaxNumTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxPromptEmbeddingTableSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxSequenceLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::GptModelConfig::mMlpHiddenSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE", "tensorrt_llm::runtime::GptModelConfig::mModelVariant"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbHeads"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbKvHeads"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE", "tensorrt_llm::runtime::GptModelConfig::mNbLayers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::GptModelConfig::mPagedContextFMHA"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE", "tensorrt_llm::runtime::GptModelConfig::mPagedKvCache"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE", "tensorrt_llm::runtime::GptModelConfig::mQuantMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::GptModelConfig::mSizePerHead"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::GptModelConfig::mTokensPerBlock"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mUseContextFMHAForGenerationE", "tensorrt_llm::runtime::GptModelConfig::mUseContextFMHAForGeneration"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE", "tensorrt_llm::runtime::GptModelConfig::mUseCustomAllReduce"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseGptAttentionPlugin"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseLoraPlugin"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE", "tensorrt_llm::runtime::GptModelConfig::mVocabSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::GptModelConfig::setLoraModules"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::GptModelConfig::setLoraModules::loraModules"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize::maxBatchSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBeamWidth::maxBeamWidth"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen::maxDraftLen"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen::maxInputLen"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxLoraRank"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxLoraRank::maxLoraRank"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens::maxNumTokens"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxSequenceLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxSequenceLen::maxSequenceLen"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMlpHiddenSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMlpHiddenSize::mlpHiddenSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant::modelVariant"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads::nbKvHeads"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::GptModelConfig::setPagedContextFMHA"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::GptModelConfig::setPagedContextFMHA::pagedContextFMHA"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode::QuantMode"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setSizePerHead"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setSizePerHead::sizePerHead"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock::TokensPerBlock"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb", "tensorrt_llm::runtime::GptModelConfig::setUseContextFMHAForGeneration"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb", "tensorrt_llm::runtime::GptModelConfig::setUseContextFMHAForGeneration::useContextFMHAForGeneration"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::GptModelConfig::supportsInflightBatching"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce::customAllReduce"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::GptModelConfig::useLoraPlugin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::GptModelConfig::useLoraPlugin"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::GptModelConfig::useLoraPlugin::useLoraPlugin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput::inputPacked"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache::pagedKvCache"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::GptModelConfig::usePromptTuning"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSessionE", "tensorrt_llm::runtime::GptSession"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE", "tensorrt_llm::runtime::GptSession::Config"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::ctxMicroBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE", "tensorrt_llm::runtime::GptSession::Config::cudaGraphMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE", "tensorrt_llm::runtime::GptSession::Config::decoderPerRequest"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::genMicroBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE", "tensorrt_llm::runtime::GptSession::Config::kvCacheConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::maxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE", "tensorrt_llm::runtime::GptSession::Config::maxBeamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE", "tensorrt_llm::runtime::GptSession::Config::maxSequenceLength"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create::graph"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch::stream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::nextContextId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::runtime"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update::graph"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream::stream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineFile"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE", "tensorrt_llm::runtime::GptSession::KvCacheConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE", "tensorrt_llm::runtime::GptSession::KvCacheManager"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE", "tensorrt_llm::runtime::GptSession::LoggerPtr"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::ctxMicroBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::genMicroBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::ctxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::genBatchSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId::flipFlopId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId::generationBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxBatches"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxPerGen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numGenBatches"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE", "tensorrt_llm::runtime::GptSession::TensorPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE", "tensorrt_llm::runtime::GptSession::TokenGeneratedCallback"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers::numMicroBatches"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsEv", "tensorrt_llm::runtime::GptSession::createContexts"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::maxSequenceLength"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::decoderPerRequest"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::logitsType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::maxAttentionWindow"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::numMicroBatches"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::sinkTokenLength"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::config"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::maxAttentionWindow"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::sinkTokenLength"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput", "tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput", "tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback::outputs"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::decoderStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::microBatchId"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::generationBatchesInputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::generationBatchesOffsets"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::kvCacheManager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::kvCacheManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchOffsets"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesFinished"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesInputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesOutputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::step"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType", "tensorrt_llm::runtime::GptSession::finalize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType", "tensorrt_llm::runtime::GptSession::finalize::microBatchId"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::inputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::outputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::microBatchesInputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::microBatchesOutputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::onTokenGenerated"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::samplingConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv", "tensorrt_llm::runtime::GptSession::getBufferManager"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv", "tensorrt_llm::runtime::GptSession::getDevice"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv", "tensorrt_llm::runtime::GptSession::getLogger"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getLogitDataTypeEv", "tensorrt_llm::runtime::GptSession::getLogitDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv", "tensorrt_llm::runtime::GptSession::getModelConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv", "tensorrt_llm::runtime::GptSession::getWorldConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::inputs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::microBatchId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::outputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::outputs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::firstBatchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::microBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE", "tensorrt_llm::runtime::GptSession::mBuffers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE", "tensorrt_llm::runtime::GptSession::mCommEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE", "tensorrt_llm::runtime::GptSession::mCommPtrs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE", "tensorrt_llm::runtime::GptSession::mCommStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE", "tensorrt_llm::runtime::GptSession::mCudaGraphInstances"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE", "tensorrt_llm::runtime::GptSession::mCudaGraphMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession26mDecoderMaxAttentionWindowE", "tensorrt_llm::runtime::GptSession::mDecoderMaxAttentionWindow"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE", "tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession23mDecoderSinkTokenLengthE", "tensorrt_llm::runtime::GptSession::mDecoderSinkTokenLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE", "tensorrt_llm::runtime::GptSession::mDecoders"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE", "tensorrt_llm::runtime::GptSession::mDevice"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE", "tensorrt_llm::runtime::GptSession::mIpcMemoryHandles"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE", "tensorrt_llm::runtime::GptSession::mKvCacheManager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE", "tensorrt_llm::runtime::GptSession::mLogger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE", "tensorrt_llm::runtime::GptSession::mMicroBatchConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE", "tensorrt_llm::runtime::GptSession::mModelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE", "tensorrt_llm::runtime::GptSession::mPipelineComm"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE", "tensorrt_llm::runtime::GptSession::mReceivedEvents"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE", "tensorrt_llm::runtime::GptSession::mRuntime"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE", "tensorrt_llm::runtime::GptSession::mWorldConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config", "tensorrt_llm::runtime::GptSession::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config", "tensorrt_llm::runtime::GptSession::setup::sessionConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::microBatchId"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv", "tensorrt_llm::runtime::GptSession::useCudaGraphs"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::contextLengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::draftTokenIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::finishedFinal"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::finishedSum"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::finishedVec"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::numDraftTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::sequenceLengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::targetTokenIds"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::curandState"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::draftLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::draftProbs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::finished"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::numDraftTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::randomAcceptThreshold"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::targetLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::targetProbs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::useRandomAcceptThreshold"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::vocabSizePadded"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::finalOutputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::manager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE", "tensorrt_llm::runtime::IGptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::CudaStreamPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv", "tensorrt_llm::runtime::IGptDecoderBatch::IGptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TensorPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TokenPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::finalize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::finalize::batchIdx"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync::token"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::IGptDecoderBatch::getFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getNbSteps"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getParentIds"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::batchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::samplingConfig"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE", "tensorrt_llm::runtime::IStatefulGptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv", "tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv", "tensorrt_llm::runtime::IStatefulGptDecoder::finalize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardSync"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getAllNewTokens"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getCumLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens::iter"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getOutputIds"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::inputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::outputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxAttentionWindow"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxTokensPerStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::sinkTokenLength"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev", "tensorrt_llm::runtime::IStatefulGptDecoder::~IStatefulGptDecoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE", "tensorrt_llm::runtime::ITensor::DimType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE", "tensorrt_llm::runtime::IpcMemory::TensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrsTensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE", "tensorrt_llm::runtime::IpcMemory::mBufferPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE", "tensorrt_llm::runtime::IpcMemory::mBufferSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE", "tensorrt_llm::runtime::IpcMemory::mWorldConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE", "tensorrt_llm::runtime::MemoryCounters::SizeType"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [2, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [2, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [2, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [2, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [2, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [2, 1, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [2, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE", "tensorrt_llm::runtime::PromptTuningParams::SizeType"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE", "tensorrt_llm::runtime::SizeType"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [2, 1, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [2, 5, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::tensorParallelism"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [2, 2, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [2, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE", "tensorrt_llm::runtime::decoder::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE", "tensorrt_llm::runtime::decoder::Input::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Input::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE", "tensorrt_llm::runtime::decoder::Input::logits"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE", "tensorrt_llm::runtime::decoder::Output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv", "tensorrt_llm::runtime::decoder::Output::Output"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE", "tensorrt_llm::runtime::decoder::Output::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Output::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE", "tensorrt_llm::runtime::decoder::Output::sequenceLengths"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", "tensorrt_llm::runtime::decoder_batch::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE", "tensorrt_llm::runtime::decoder_batch::Input::active"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", "tensorrt_llm::runtime::decoder_batch::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", "tensorrt_llm::runtime::decoder_batch::Output"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", "tensorrt_llm::runtime::decoder_batch::Request"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", "tensorrt_llm::runtime::decoder_batch::Request::BufferPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::ConstTensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::inputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::maxNewTokens"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::badWordsList"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE", "tensorrt_llm::runtime::decoder_batch::Request::computeCumLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE", "tensorrt_llm::runtime::decoder_batch::Request::computeLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE", "tensorrt_llm::runtime::decoder_batch::Request::draftLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", "tensorrt_llm::runtime::decoder_batch::Request::draftTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", "tensorrt_llm::runtime::decoder_batch::Request::embeddingBias"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", "tensorrt_llm::runtime::decoder_batch::Request::endId"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13decoder_batch7Request22generatedTokensPerStepEv", "tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", "tensorrt_llm::runtime::decoder_batch::Request::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", "tensorrt_llm::runtime::decoder_batch::Request::inputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", "tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE", "tensorrt_llm::runtime::decoder_batch::Token"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::active"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::event"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE", "tensorrt_llm::runtime::decoder_batch::Token::active"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE", "tensorrt_llm::runtime::decoder_batch::Token::event"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::enable"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime5utilsE", "tensorrt_llm::runtime::utils"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine::enginePath"]], "tensorrt_llm": [[23, 8, 0, "-", "functional"], [25, 8, 0, "-", "models"], [26, 8, 0, "-", "plugin"], [27, 8, 0, "-", "quantization"], [28, 8, 0, "-", "runtime"]], "tensorrt_llm.functional": [[23, 9, 1, "", "AllReduceStrategy"], [23, 9, 1, "", "AttentionMaskType"], [23, 9, 1, "", "DimRange"], [23, 9, 1, "", "LayerNormPositionType"], [23, 9, 1, "", "LayerNormType"], [23, 9, 1, "", "MLPType"], [23, 9, 1, "", "PositionEmbeddingType"], [23, 9, 1, "", "RotaryScalingType"], [23, 9, 1, "", "Tensor"], [23, 13, 1, "", "abs"], [23, 13, 1, "", "activation"], [23, 13, 1, "", "add"], [23, 13, 1, "", "allgather"], [23, 13, 1, "", "allreduce"], [23, 13, 1, "", "arange"], [23, 13, 1, "", "argmax"], [23, 13, 1, "", "assertion"], [23, 13, 1, "", "avg_pool2d"], [23, 13, 1, "", "bert_attention"], [23, 13, 1, "", "broadcast_helper"], [23, 13, 1, "", "cast"], [23, 13, 1, "", "chunk"], [23, 13, 1, "", "clip"], [23, 13, 1, "", "concat"], [23, 13, 1, "", "constant"], [23, 13, 1, "", "constant_to_tensor_"], [23, 13, 1, "", "conv1d"], [23, 13, 1, "", "conv2d"], [23, 13, 1, "", "conv_transpose2d"], [23, 13, 1, "", "cos"], [23, 13, 1, "", "cumsum"], [23, 13, 1, "", "div"], [23, 13, 1, "", "einsum"], [23, 13, 1, "", "elementwise_binary"], [23, 13, 1, "", "embedding"], [23, 13, 1, "", "eq"], [23, 13, 1, "", "exp"], [23, 13, 1, "", "expand"], [23, 13, 1, "", "expand_dims"], [23, 13, 1, "", "expand_dims_like"], [23, 13, 1, "", "expand_mask"], [23, 13, 1, "", "flip"], [23, 13, 1, "", "gather"], [23, 13, 1, "", "gather_last_token_logits"], [23, 13, 1, "", "geglu"], [23, 13, 1, "", "gelu"], [23, 13, 1, "", "generate_alibi_biases"], [23, 13, 1, "", "generate_alibi_slopes"], [23, 13, 1, "", "gpt_attention"], [23, 13, 1, "", "group_norm"], [23, 13, 1, "", "gt"], [23, 13, 1, "", "identity"], [23, 13, 1, "", "index_select"], [23, 13, 1, "", "interpolate"], [23, 13, 1, "", "is_gated_activation"], [23, 13, 1, "", "layer_norm"], [23, 13, 1, "", "lora_plugin"], [23, 13, 1, "", "lt"], [23, 13, 1, "", "masked_select"], [23, 13, 1, "", "matmul"], [23, 13, 1, "", "max"], [23, 13, 1, "", "maximum"], [23, 13, 1, "", "mean"], [23, 13, 1, "", "minimum"], [23, 13, 1, "", "mul"], [23, 13, 1, "", "non_gated_version"], [23, 13, 1, "", "op_and"], [23, 13, 1, "", "op_or"], [23, 13, 1, "", "outer"], [23, 13, 1, "", "permute"], [23, 13, 1, "", "pow"], [23, 13, 1, "", "recv"], [23, 13, 1, "", "relu"], [23, 13, 1, "", "repeat_interleave"], [23, 13, 1, "", "rms_norm"], [23, 13, 1, "", "round"], [23, 13, 1, "", "select"], [23, 13, 1, "", "selective_scan"], [23, 13, 1, "", "send"], [23, 13, 1, "", "shape"], [23, 13, 1, "", "sigmoid"], [23, 13, 1, "", "silu"], [23, 13, 1, "", "sin"], [23, 13, 1, "", "slice"], [23, 13, 1, "", "softmax"], [23, 13, 1, "", "softplus"], [23, 13, 1, "", "split"], [23, 13, 1, "", "sqrt"], [23, 13, 1, "", "squared_relu"], [23, 13, 1, "", "stack"], [23, 13, 1, "", "sub"], [23, 13, 1, "", "swiglu"], [23, 13, 1, "", "tanh"], [23, 13, 1, "", "transpose"], [23, 13, 1, "", "unary"], [23, 13, 1, "", "unsqueeze"], [23, 13, 1, "", "view"], [23, 13, 1, "", "where"]], "tensorrt_llm.functional.AllReduceStrategy": [[23, 10, 1, "", "AUTO"], [23, 10, 1, "", "ONESHOT"], [23, 10, 1, "", "RING"], [23, 10, 1, "", "TWOSHOT"]], "tensorrt_llm.functional.AttentionMaskType": [[23, 10, 1, "", "bidirectional"], [23, 10, 1, "", "bidirectionalglm"], [23, 10, 1, "", "causal"], [23, 10, 1, "", "padding"]], "tensorrt_llm.functional.LayerNormPositionType": [[23, 10, 1, "", "post_layernorm"], [23, 10, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[23, 10, 1, "", "GroupNorm"], [23, 10, 1, "", "LayerNorm"], [23, 10, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[23, 10, 1, "", "FusedGatedMLP"], [23, 10, 1, "", "GatedMLP"], [23, 10, 1, "", "MLP"]], "tensorrt_llm.functional.PositionEmbeddingType": [[23, 10, 1, "", "alibi"], [23, 10, 1, "", "alibi_with_scale"], [23, 10, 1, "", "chatglm"], [23, 11, 1, "", "choices"], [23, 11, 1, "", "from_string"], [23, 11, 1, "", "is_alibi"], [23, 11, 1, "", "is_rope"], [23, 10, 1, "", "learned_absolute"], [23, 10, 1, "", "relative"], [23, 10, 1, "", "rope_gpt_neox"], [23, 10, 1, "", "rope_gptj"]], "tensorrt_llm.functional.RotaryScalingType": [[23, 10, 1, "", "dynamic"], [23, 10, 1, "", "linear"], [23, 10, 1, "", "none"]], "tensorrt_llm.functional.Tensor": [[23, 11, 1, "", "abs"], [23, 11, 1, "", "cast"], [23, 12, 1, "", "dtype"], [23, 11, 1, "", "get_parent"], [23, 11, 1, "", "get_users"], [23, 11, 1, "", "is_dynamic"], [23, 11, 1, "", "is_trt_wrapper"], [23, 12, 1, "", "location"], [23, 11, 1, "", "mark_output"], [23, 11, 1, "", "max"], [23, 11, 1, "", "mean"], [23, 12, 1, "", "name"], [23, 11, 1, "", "ndim"], [23, 12, 1, "", "network"], [23, 11, 1, "", "permute"], [23, 11, 1, "", "rank"], [23, 11, 1, "", "replace_all_uses_with"], [23, 12, 1, "", "shape"], [23, 11, 1, "", "size"], [23, 11, 1, "", "split"], [23, 11, 1, "", "sqrt"], [23, 11, 1, "", "transpose"], [23, 11, 1, "", "view"]], "tensorrt_llm.layers": [[24, 8, 0, "-", "activation"], [24, 8, 0, "-", "attention"], [24, 8, 0, "-", "cast"], [24, 8, 0, "-", "conv"], [24, 8, 0, "-", "embedding"], [24, 8, 0, "-", "linear"], [24, 8, 0, "-", "mlp"], [24, 8, 0, "-", "normalization"], [24, 8, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[24, 9, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[24, 9, 1, "", "Attention"], [24, 9, 1, "", "AttentionParams"], [24, 9, 1, "", "BertAttention"], [24, 9, 1, "", "KeyValueCacheParams"], [24, 9, 1, "", "RopeEmbeddingUtils"], [24, 13, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention.AttentionParams": [[24, 11, 1, "", "is_valid"], [24, 11, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[24, 11, 1, "", "fill_none_tensor_list"], [24, 11, 1, "", "get_first_host_kv_cache_block_pointers"], [24, 11, 1, "", "get_first_kv_cache_block_pointers"], [24, 11, 1, "", "get_first_past_key_value"], [24, 11, 1, "", "is_valid"]], "tensorrt_llm.layers.attention.RopeEmbeddingUtils": [[24, 11, 1, "", "apply_rotary_pos_emb"], [24, 11, 1, "", "apply_rotary_pos_emb_chatglm"], [24, 11, 1, "", "create_sinusoidal_positions"], [24, 11, 1, "", "rotate_every_two"], [24, 11, 1, "", "rotate_half"]], "tensorrt_llm.layers.cast": [[24, 9, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[24, 9, 1, "", "Conv1d"], [24, 9, 1, "", "Conv2d"], [24, 9, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[24, 9, 1, "", "Embedding"], [24, 9, 1, "", "PromptTuningEmbedding"]], "tensorrt_llm.layers.embedding.Embedding": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[24, 10, 1, "", "ColumnLinear"], [24, 9, 1, "", "Linear"], [24, 9, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[24, 11, 1, "", "forward"], [24, 11, 1, "", "multiply_gather"]], "tensorrt_llm.layers.linear.RowLinear": [[24, 11, 1, "", "forward"], [24, 11, 1, "", "multiply_reduce"]], "tensorrt_llm.layers.mlp": [[24, 9, 1, "", "FusedGatedMLP"], [24, 9, 1, "", "GatedMLP"], [24, 9, 1, "", "MLP"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[24, 9, 1, "", "GroupNorm"], [24, 9, 1, "", "LayerNorm"], [24, 9, 1, "", "RmsNorm"]], "tensorrt_llm.layers.normalization.GroupNorm": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[24, 11, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[24, 9, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[24, 11, 1, "", "forward"]], "tensorrt_llm.models": [[25, 9, 1, "", "BaichuanForCausalLM"], [25, 9, 1, "", "BertForQuestionAnswering"], [25, 9, 1, "", "BertForSequenceClassification"], [25, 9, 1, "", "BertModel"], [25, 9, 1, "", "BloomForCausalLM"], [25, 9, 1, "", "BloomModel"], [25, 9, 1, "", "ChatGLMForCausalLM"], [25, 9, 1, "", "ChatGLMModel"], [25, 9, 1, "", "DecoderModel"], [25, 9, 1, "", "EncoderModel"], [25, 9, 1, "", "FalconForCausalLM"], [25, 9, 1, "", "FalconModel"], [25, 9, 1, "", "GPTJForCausalLM"], [25, 9, 1, "", "GPTJModel"], [25, 9, 1, "", "GPTLMHeadModel"], [25, 9, 1, "", "GPTModel"], [25, 9, 1, "", "GPTNeoXForCausalLM"], [25, 9, 1, "", "GPTNeoXModel"], [25, 9, 1, "", "LLaMAForCausalLM"], [25, 9, 1, "", "LLaMAModel"], [25, 9, 1, "", "MPTForCausalLM"], [25, 9, 1, "", "MPTModel"], [25, 9, 1, "", "MambaLMHeadModel"], [25, 9, 1, "", "MedusaForCausalLm"], [25, 9, 1, "", "OPTForCausalLM"], [25, 9, 1, "", "OPTModel"], [25, 9, 1, "", "PhiForCausalLM"], [25, 9, 1, "", "PhiModel"], [25, 9, 1, "", "PretrainedConfig"], [25, 9, 1, "", "PretrainedModel"], [25, 9, 1, "", "QWenForCausalLM"], [25, 9, 1, "", "SkyworkForCausalLM"], [25, 9, 1, "", "WhisperEncoder"], [25, 13, 1, "", "quantize_model"]], "tensorrt_llm.models.BertForQuestionAnswering": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[25, 11, 1, "", "check_config"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.ChatGLMModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.DecoderModel": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.FalconForCausalLM": [[25, 11, 1, "", "check_config"]], "tensorrt_llm.models.FalconModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTJForCausalLM": [[25, 11, 1, "", "check_config"]], "tensorrt_llm.models.GPTJModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTLMHeadModel": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.LLaMAForCausalLM": [[25, 11, 1, "", "check_config"], [25, 11, 1, "", "default_plugin_config"], [25, 11, 1, "", "from_hugging_face"], [25, 11, 1, "", "rotary_base"], [25, 11, 1, "", "rotary_scaling"]], "tensorrt_llm.models.LLaMAModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.MPTForCausalLM": [[25, 11, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.MambaLMHeadModel": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaForCausalLm": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.OPTForCausalLM": [[25, 11, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[25, 11, 1, "", "check_config"]], "tensorrt_llm.models.PhiModel": [[25, 11, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[25, 11, 1, "", "from_dict"], [25, 11, 1, "", "from_json_file"], [25, 11, 1, "", "set_if_not_exist"], [25, 11, 1, "", "set_rank"], [25, 11, 1, "", "to_dict"]], "tensorrt_llm.models.PretrainedModel": [[25, 11, 1, "", "check_config"], [25, 11, 1, "", "from_checkpoint"], [25, 11, 1, "", "from_config"], [25, 11, 1, "", "load"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.QWenForCausalLM": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.SkyworkForCausalLM": [[25, 11, 1, "", "check_config"]], "tensorrt_llm.models.WhisperEncoder": [[25, 11, 1, "", "forward"], [25, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[26, 9, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[26, 11, 1, "", "to_legacy_setting"]], "tensorrt_llm.quantization": [[27, 9, 1, "", "QuantMode"]], "tensorrt_llm.runtime": [[28, 9, 1, "", "ChatGLMGenerationSession"], [28, 9, 1, "", "GenerationSequence"], [28, 9, 1, "", "GenerationSession"], [28, 9, 1, "", "KVCacheManager"], [28, 9, 1, "", "LogitsProcessor"], [28, 9, 1, "", "LogitsProcessorList"], [28, 9, 1, "", "MambaLMHeadModelGenerationSession"], [28, 9, 1, "", "ModelConfig"], [28, 9, 1, "", "ModelRunner"], [28, 9, 1, "", "QWenForCausalLMGenerationSession"], [28, 9, 1, "", "Session"], [28, 9, 1, "", "StoppingCriteria"], [28, 9, 1, "", "StoppingCriteriaList"], [28, 9, 1, "", "TensorInfo"], [28, 13, 1, "", "to_word_list_format"]], "tensorrt_llm.runtime.GenerationSequence": [[28, 11, 1, "", "get_batch_idx"], [28, 11, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[28, 10, 1, "", "batch_size"], [28, 10, 1, "", "buffer_allocated"], [28, 12, 1, "", "cross_attention"], [28, 10, 1, "", "cuda_graph_mode"], [28, 11, 1, "", "cuda_stream_guard"], [28, 10, 1, "", "debug_mode"], [28, 10, 1, "", "debug_tensors_to_save"], [28, 11, 1, "", "decode"], [28, 11, 1, "", "decode_batch"], [28, 11, 1, "", "decode_regular"], [28, 11, 1, "", "decode_stream"], [28, 10, 1, "", "device"], [28, 12, 1, "", "dtype"], [28, 11, 1, "", "dump_debug_buffers"], [28, 11, 1, "", "early_stop_criteria"], [28, 11, 1, "", "filter_medusa_logits"], [28, 11, 1, "", "finalize_decoder"], [28, 11, 1, "", "find_best_medusa_path"], [28, 12, 1, "", "first_layer"], [28, 12, 1, "", "gather_context_logits"], [28, 12, 1, "", "gather_generation_logits"], [28, 11, 1, "", "get_next_medusa_tokens"], [28, 11, 1, "", "handle_per_step"], [28, 12, 1, "", "has_position_embedding"], [28, 12, 1, "", "has_token_type_embedding"], [28, 12, 1, "", "head_size"], [28, 12, 1, "", "hidden_size"], [28, 12, 1, "", "is_medusa_mode"], [28, 12, 1, "", "last_layer"], [28, 10, 1, "", "mapping"], [28, 12, 1, "", "max_medusa_tokens"], [28, 12, 1, "", "max_prompt_embedding_table_size"], [28, 10, 1, "", "medusa_paths"], [28, 10, 1, "", "medusa_position_offsets"], [28, 10, 1, "", "medusa_temperature"], [28, 10, 1, "", "medusa_topks"], [28, 10, 1, "", "medusa_tree_ids"], [28, 11, 1, "", "next_medusa_input_ids"], [28, 12, 1, "", "num_heads"], [28, 12, 1, "", "num_heads_kv"], [28, 12, 1, "", "num_layers"], [28, 12, 1, "", "num_medusa_heads"], [28, 10, 1, "", "num_medusa_tokens"], [28, 12, 1, "", "paged_kv_cache"], [28, 11, 1, "", "pp_communicate_final_output_ids"], [28, 11, 1, "", "pp_communicate_new_tokens"], [28, 11, 1, "", "process_logits_for_medusa_mode"], [28, 12, 1, "", "quant_mode"], [28, 12, 1, "", "remove_input_padding"], [28, 10, 1, "", "runtime"], [28, 11, 1, "", "setup"], [28, 12, 1, "", "tokens_per_block"], [28, 11, 1, "", "update_kv_cache_draft_token_location"], [28, 11, 1, "", "update_output_ids_by_offset"], [28, 12, 1, "", "use_context_fmha_for_generation"], [28, 12, 1, "", "use_custom_all_reduce"], [28, 12, 1, "", "use_gpt_attention_plugin"], [28, 12, 1, "", "use_lora_plugin"], [28, 12, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[28, 11, 1, "", "add_sequence"], [28, 11, 1, "", "get_pointer_arrays"], [28, 11, 1, "", "step"]], "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession": [[28, 12, 1, "", "mamba_d_conv"], [28, 12, 1, "", "mamba_d_state"], [28, 12, 1, "", "mamba_expand"], [28, 11, 1, "", "setup"]], "tensorrt_llm.runtime.ModelConfig": [[28, 10, 1, "", "cross_attention"], [28, 10, 1, "", "dtype"], [28, 10, 1, "", "gather_context_logits"], [28, 10, 1, "", "gather_generation_logits"], [28, 10, 1, "", "gpt_attention_plugin"], [28, 10, 1, "", "has_position_embedding"], [28, 10, 1, "", "has_token_type_embedding"], [28, 10, 1, "", "head_size"], [28, 10, 1, "", "hf_modules_to_trtllm_modules"], [28, 10, 1, "", "hidden_size"], [28, 10, 1, "", "lora_plugin"], [28, 10, 1, "", "lora_target_modules"], [28, 10, 1, "", "mamba_d_conv"], [28, 10, 1, "", "mamba_d_state"], [28, 10, 1, "", "mamba_expand"], [28, 10, 1, "", "max_batch_size"], [28, 10, 1, "", "max_medusa_tokens"], [28, 10, 1, "", "max_prompt_embedding_table_size"], [28, 10, 1, "", "model_name"], [28, 10, 1, "", "num_heads"], [28, 10, 1, "", "num_kv_heads"], [28, 10, 1, "", "num_layers"], [28, 10, 1, "", "num_medusa_heads"], [28, 10, 1, "", "paged_kv_cache"], [28, 10, 1, "", "quant_mode"], [28, 10, 1, "", "remove_input_padding"], [28, 10, 1, "", "tokens_per_block"], [28, 10, 1, "", "trtllm_modules_to_hf_modules"], [28, 10, 1, "", "use_context_fmha_for_generation"], [28, 10, 1, "", "use_custom_all_reduce"], [28, 10, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[28, 12, 1, "", "dtype"], [28, 11, 1, "", "from_dir"], [28, 12, 1, "", "gather_context_logits"], [28, 12, 1, "", "gather_generation_logits"], [28, 11, 1, "", "generate"], [28, 12, 1, "", "hidden_size"], [28, 12, 1, "", "max_prompt_embedding_table_size"], [28, 12, 1, "", "max_sequence_length"], [28, 12, 1, "", "num_heads"], [28, 12, 1, "", "num_layers"], [28, 12, 1, "", "remove_input_padding"], [28, 11, 1, "", "serialize_engine"], [28, 12, 1, "", "use_lora_plugin"], [28, 12, 1, "", "vocab_size"], [28, 12, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[28, 11, 1, "", "generate"]], "tensorrt_llm.runtime.Session": [[28, 12, 1, "", "context"], [28, 12, 1, "", "engine"], [28, 11, 1, "", "from_engine"], [28, 11, 1, "", "from_serialized_engine"], [28, 11, 1, "", "infer_shapes"], [28, 11, 1, "", "run"], [28, 12, 1, "", "runtime"], [28, 11, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[28, 10, 1, "", "dtype"], [28, 10, 1, "", "name"], [28, 10, 1, "", "shape"]]}, "objtypes": {"0": "cpp:type", "1": "cpp:class", "2": "cpp:function", "3": "cpp:functionParam", "4": "cpp:member", "5": "cpp:templateParam", "6": "cpp:enum", "7": "cpp:enumerator", "8": "py:module", "9": "py:class", "10": "py:attribute", "11": "py:method", "12": "py:property", "13": "py:function"}, "objnames": {"0": ["cpp", "type", "C++ type"], "1": ["cpp", "class", "C++ class"], "2": ["cpp", "function", "C++ function"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "member", "C++ member"], "5": ["cpp", "templateParam", "C++ template parameter"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["py", "module", "Python module"], "9": ["py", "class", "Python class"], "10": ["py", "attribute", "Python attribute"], "11": ["py", "method", "Python method"], "12": ["py", "property", "Python property"], "13": ["py", "function", "Python function"]}, "titleterms": {"how": [0, 1, 19], "add": 0, "new": [0, 8, 18], "model": [0, 1, 3, 12, 19, 20, 21, 25], "step": [0, 10], "debug": 1, "overview": [1, 10, 18], "unit": 1, "test": 1, "e2": 1, "execut": [1, 4, 21], "error": 1, "runtim": [2, 3, 10, 12, 16, 17, 19, 28], "buffermanag": 2, "h": 2, "common": 2, "cudaev": 2, "cudastream": 2, "decodinginput": 2, "decodingoutput": 2, "generationinput": 2, "generationoutput": 2, "gptdecod": 2, "gptdecoderbatch": 2, "gptjsonconfig": 2, "gptmodelconfig": 2, "gptsession": 2, "ibuff": 2, "igptdecoderbatch": 2, "istatefulgptdecod": 2, "itensor": 2, "ipcutil": 2, "memorycount": 2, "prompttuningparam": 2, "samplingconfig": 2, "tllmlogger": 2, "worldconfig": 2, "tensorrt": [3, 4, 6, 7, 9, 10, 14, 17, 18, 19, 20, 21], "llm": [3, 4, 6, 7, 9, 10, 14, 17, 18, 19, 20, 21], "architectur": 3, "definit": 3, "compil": 3, "weight": [3, 17, 18, 22], "bind": [3, 10], "pattern": [3, 13], "match": 3, "fusion": [3, 19], "plugin": [3, 19, 26], "multi": [3, 4, 11, 19, 21], "gpu": [3, 4, 5, 17, 19, 20, 21], "node": 3, "support": [3, 10, 12, 22], "In": [3, 4, 12, 19], "flight": [3, 4, 12, 19], "batch": [3, 4, 11, 12, 19], "The": [4, 12, 22], "manag": [4, 13], "api": [4, 13, 14], "get": 4, "send": 4, "callback": 4, "request": [4, 15], "interrupt": 4, "statist": 4, "other": 4, "mandatori": 4, "gptmanag": [4, 16], "paramet": [4, 12], "option": [4, 10, 19, 21], "respons": 4, "content": [4, 14], "design": 4, "triton": 4, "infer": [4, 9, 15, 17, 21], "server": 4, "falcon": [5, 20], "180b": [5, 20], "singl": [5, 21], "h200": [5, 7, 8, 20], "int4": [5, 22], "awq": [5, 18, 22], "6": 5, "7x": 5, "faster": 5, "llama": [5, 8, 20], "70b": [5, 8, 20], "over": 5, "a100": [5, 6, 20], "up": [5, 8, 9, 19], "close": [5, 8], "h100": [6, 7, 20], "ha": 6, "4": 6, "6x": 6, "perform": [6, 9, 19, 20, 21], "achiev": [6, 7], "10": 6, "000": [6, 7], "tok": 6, "": [6, 9, 11, 14], "100m": 6, "first": [6, 20], "token": [6, 7, 19, 20], "mlperf": 6, "fp8": [6, 11, 18, 20, 22], "what": [6, 9], "i": 6, "nearli": 7, "12": 7, "sec": 7, "llama2": [7, 20], "13b": 7, "v": 7, "latest": 7, "hbm": 7, "memori": [7, 17, 19], "xqa": [8, 11], "kernel": 8, "provid": 8, "2": [8, 10], "4x": 8, "more": 8, "throughput": [8, 20], "within": 8, "same": 8, "latenc": [8, 20], "budget": 8, "increas": 8, "speed": 9, "sota": 9, "quantiz": [9, 18, 22, 27], "techniqu": 9, "trt": 9, "benchmark": [9, 20], "accuraci": 9, "best": [9, 19], "practic": [9, 19], "choos": 9, "right": 9, "method": [9, 13], "come": 9, "next": 9, "build": [10, 18, 19, 20], "from": 10, "sourc": 10, "fetch": 10, "1": 10, "One": 10, "creat": 10, "contain": [10, 20], "On": 10, "system": [10, 21], "gnu": 10, "make": [10, 18], "without": 10, "python": [10, 14, 17], "c": [10, 12, 14, 17], "link": 10, "header": 10, "file": 10, "head": [11, 19], "queri": 11, "group": 11, "attent": [11, 18, 19, 24], "import": 11, "note": 11, "pad": [11, 19], "pack": 11, "tensor": [11, 13, 16, 17], "context": [11, 19], "gener": [11, 12], "phase": 11, "optim": [11, 19], "inflight": 11, "chunk": [11, 19], "kv": [11, 17, 18, 19], "cach": [11, 17, 18, 19], "contigu": 11, "page": [11, 19], "int8": [11, 22], "slide": 11, "window": [11, 19], "cyclic": 11, "roll": 11, "buffer": 11, "streamingllm": 11, "beam": 11, "search": 11, "input": [11, 12, 19], "qkv": 11, "rotari": 11, "posit": 11, "embed": [11, 19, 24], "rope": 11, "alibi": 11, "scale": [11, 18], "factor": [11, 18], "cross": 11, "rel": 11, "bia": 11, "rab": 11, "gpt": [12, 16, 19, 20], "session": 12, "creation": 12, "configur": 12, "world": 12, "output": 12, "sampl": 12, "intern": 12, "compon": 12, "know": 12, "issu": [12, 17, 20], "futur": 12, "chang": 12, "graph": 13, "rewrit": 13, "modul": [13, 16], "when": 13, "us": [13, 16], "relat": 13, "flayerinfo": 13, "retriev": 13, "high": 13, "level": 13, "inform": 13, "function": [13, 23], "record_signatur": 13, "decor": 13, "requir": 13, "classic": 13, "workflow": [13, 18], "welcom": 14, "document": 14, "indic": 14, "tabl": 14, "blog": 14, "run": [16, 20], "2b": 16, "lora": 16, "cpp": 16, "format": 16, "detail": [16, 22], "exampl": [16, 18, 21], "id": 16, "map": 16, "usag": [17, 21], "understand": 17, "time": [17, 21], "size": [17, 19], "activ": [17, 24], "pool": [17, 24], "known": [17, 20], "faq": 17, "prepar": 18, "checkpoint": 18, "config": 18, "rank": [18, 21], "mlp": [18, 19, 24], "layernorm": 18, "engin": [18, 20], "evalu": 18, "tune": 19, "To": 19, "measur": 19, "fuse": [19, 20], "remov": 19, "maximum": 19, "number": 19, "sequenc": 19, "block": 19, "mode": 19, "custom": 19, "allreduc": 19, "parallel": 19, "share": 19, "look": 19, "horizont": 19, "gate": [19, 20], "bert": 19, "type": 19, "max": 19, "free": 19, "fraction": 19, "schedul": 19, "polici": 19, "overlap": 19, "methodologi": 20, "peak": 20, "l40": 20, "fp16": [20, 22], "low": 20, "sup": 20, "matmul": 20, "silu": 20, "reproduc": 20, "result": 20, "setup": 20, "per": 20, "j": 20, "6b": 20, "7b": 20, "analysi": 21, "featur": 21, "descript": 21, "command": 21, "line": 21, "environ": 21, "variabl": 21, "coordin": 21, "nvidia": 21, "nsight": 21, "launch": 21, "profil": 21, "ifb": 21, "iter": 21, "numer": 22, "precis": 22, "fp32": 22, "bf16": 22, "dequant": 22, "q": 22, "dq": 22, "smoothquant": 22, "w8a8": 22, "onli": 22, "w4a16": 22, "w8a16": 22, "gptq": 22, "hopper": 22, "matrix": 22, "technic": 22, "quantmod": 22, "flag": 22, "layer": 24, "cast": 24, "conv": 24, "linear": 24, "normal": 24}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.todo": 2, "sphinx": 60}, "alltitles": {"How to add a new model": [[0, "how-to-add-a-new-model"]], "Steps": [[0, "steps"]], "How to debug": [[1, "how-to-debug"]], "Overview": [[1, "overview"], [10, "overview"], [18, "overview"]], "Debug on unit tests": [[1, "debug-on-unit-tests"]], "Debug on E2E models": [[1, "debug-on-e2e-models"]], "Debug execution errors": [[1, "debug-execution-errors"]], "Runtime": [[2, "runtime"], [3, "runtime"], [28, "module-tensorrt_llm.runtime"]], "bufferManager.h": [[2, "buffermanager-h"]], "common.h": [[2, "common-h"]], "cudaEvent.h": [[2, "cudaevent-h"]], "cudaStream.h": [[2, "cudastream-h"]], "decodingInput.h": [[2, "decodinginput-h"]], "decodingOutput.h": [[2, "decodingoutput-h"]], "generationInput.h": [[2, "generationinput-h"]], "generationOutput.h": [[2, "generationoutput-h"]], "gptDecoder.h": [[2, "gptdecoder-h"]], "gptDecoderBatch.h": [[2, "gptdecoderbatch-h"]], "gptJsonConfig.h": [[2, "gptjsonconfig-h"]], "gptModelConfig.h": [[2, "gptmodelconfig-h"]], "gptSession.h": [[2, "gptsession-h"]], "iBuffer.h": [[2, "ibuffer-h"]], "iGptDecoderBatch.h": [[2, "igptdecoderbatch-h"]], "iStatefulGptDecoder.h": [[2, "istatefulgptdecoder-h"]], "iTensor.h": [[2, "itensor-h"]], "ipcUtils.h": [[2, "ipcutils-h"]], "memoryCounters.h": [[2, "memorycounters-h"]], "promptTuningParams.h": [[2, "prompttuningparams-h"]], "samplingConfig.h": [[2, "samplingconfig-h"]], "tllmLogger.h": [[2, "tllmlogger-h"]], "worldConfig.h": [[2, "worldconfig-h"]], "TensorRT-LLM Architecture": [[3, "tensorrt-llm-architecture"]], "Model Definition": [[3, "model-definition"]], "Compilation": [[3, "compilation"]], "Weight Bindings": [[3, "weight-bindings"]], "Pattern-Matching and Fusion": [[3, "pattern-matching-and-fusion"]], "Plugins": [[3, "plugins"]], "Multi-GPU and Multi-Node Support": [[3, "multi-gpu-and-multi-node-support"]], "In-flight Batching": [[3, "in-flight-batching"]], "The Batch Manager in TensorRT-LLM": [[4, "the-batch-manager-in-tensorrt-llm"]], "The Batch Manager API": [[4, "the-batch-manager-api"]], "Get and Send Callbacks": [[4, "get-and-send-callbacks"]], "Request Interruption": [[4, "request-interruption"]], "Statistics": [[4, "statistics"]], "Other mandatory GptManager parameters": [[4, "other-mandatory-gptmanager-parameters"]], "Optional GptManager parameters": [[4, "optional-gptmanager-parameters"]], "Responses content": [[4, "responses-content"]], "GptManager Design": [[4, "gptmanager-design"]], "Multi-GPU execution": [[4, "multi-gpu-execution"]], "In-flight Batching with the Triton Inference Server": [[4, "in-flight-batching-with-the-triton-inference-server"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[5, "falcon-180b-on-a-single-h200-gpu-with-int4-awq-and-6-7x-faster-llama-70b-over-a100"]], "Falcon-180B on a single H200 with INT4 AWQ": [[5, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Llama-70B on H200 up to 6.7x A100": [[5, "llama-70b-on-h200-up-to-6-7x-a100"]], "Closing": [[5, "closing"], [8, "closing"]], "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token": [[6, "h100-has-4-6x-a100-performance-in-tensorrt-llm-achieving-10-000-tok-s-at-100ms-to-first-token"]], "MLPerf on H100 with FP8": [[6, "mlperf-on-h100-with-fp8"]], "What is H100 FP8?": [[6, "what-is-h100-fp8"]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM": [[7, "h200-achieves-nearly-12-000-tokens-sec-on-llama2-13b-with-tensorrt-llm"]], "H200 vs H100": [[7, "h200-vs-h100"]], "Latest HBM Memory": [[7, "latest-hbm-memory"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[8, "new-xqa-kernel-provides-2-4x-more-llama-70b-throughput-within-the-same-latency-budget"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[8, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[9, "speed-up-inference-with-sota-quantization-techniques-in-trt-llm"]], "Quantization in TensorRT-LLM": [[9, "quantization-in-tensorrt-llm"]], "Benchmark": [[9, "benchmark"]], "Performance": [[9, "performance"]], "Accuracy": [[9, "accuracy"]], "Best practices to choose the right quantization methods": [[9, "best-practices-to-choose-the-right-quantization-methods"]], "What\u2019s coming next": [[9, "whats-coming-next"]], "Build from Source": [[10, "build-from-source"]], "Fetch the Sources": [[10, "fetch-the-sources"]], "Option 1: Build TensorRT-LLM in One Step": [[10, "option-1-build-tensorrt-llm-in-one-step"]], "Option 2: Build Step-by-step": [[10, "option-2-build-step-by-step"]], "Create the Container": [[10, "create-the-container"]], "On Systems with GNU make": [[10, "on-systems-with-gnu-make"]], "On Systems Without GNU make": [[10, "on-systems-without-gnu-make"]], "Build TensorRT-LLM": [[10, "build-tensorrt-llm"]], "Build the Python Bindings for the C++ Runtime": [[10, "build-the-python-bindings-for-the-c-runtime"]], "Link with the TensorRT-LLM C++ Runtime": [[10, "link-with-the-tensorrt-llm-c-runtime"]], "Supported C++ Header Files": [[10, "supported-c-header-files"]], "Multi-head, Multi-query and Group-query Attention": [[11, "multi-head-multi-query-and-group-query-attention"]], "Important Note": [[11, "important-note"]], "Padded and Packed Tensors": [[11, "padded-and-packed-tensors"]], "Context and Generation Phases": [[11, "context-and-generation-phases"]], "Context Phase": [[11, "context-phase"]], "Generation Phase": [[11, "generation-phase"]], "XQA Optimization": [[11, "xqa-optimization"]], "Inflight batching": [[11, "inflight-batching"]], "Chunked Context": [[11, "chunked-context"], [19, "chunked-context"]], "KV Cache(s)": [[11, "kv-cache-s"]], "Contiguous KV Cache": [[11, "contiguous-kv-cache"]], "Paged KV Cache": [[11, "paged-kv-cache"], [19, "paged-kv-cache"]], "INT8/FP8 KV Caches": [[11, "int8-fp8-kv-caches"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[11, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "StreamingLLM": [[11, "streamingllm"]], "Beam-Search": [[11, "beam-search"]], "Input QKV tensor": [[11, "input-qkv-tensor"]], "Rotary Positional Embedding (RoPE)": [[11, "rotary-positional-embedding-rope"]], "ALiBi": [[11, "alibi"]], "Scaling factor(s)": [[11, "scaling-factor-s"]], "Cross Attention": [[11, "cross-attention"]], "Relative Attention Bias (RAB)": [[11, "relative-attention-bias-rab"]], "C++ GPT Runtime": [[12, "c-gpt-runtime"]], "The Session": [[12, "the-session"]], "Creation": [[12, "creation"]], "Session Configuration": [[12, "session-configuration"]], "Model Configuration": [[12, "model-configuration"]], "World Configuration": [[12, "world-configuration"]], "Generation": [[12, "generation"]], "Inputs and Outputs": [[12, "inputs-and-outputs"]], "Sampling Parameters": [[12, "sampling-parameters"]], "Internal Components": [[12, "internal-components"]], "In-flight Batching Support": [[12, "in-flight-batching-support"]], "Know Issues and Future Changes": [[12, "know-issues-and-future-changes"]], "Graph Rewriting Module": [[13, "graph-rewriting-module"]], "When to Use Graph Rewriting?": [[13, "when-to-use-graph-rewriting"]], "Graph Rewriting APIs": [[13, "graph-rewriting-apis"]], "Tensor-Related Methods": [[13, "tensor-related-methods"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[13, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "Pattern and Pattern Manager": [[13, "pattern-and-pattern-manager"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[13, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "Classical Workflow": [[13, "classical-workflow"]], "Welcome to TensorRT-LLM\u2019s documentation!": [[14, "welcome-to-tensorrt-llm-s-documentation"]], "Contents:": [[14, null]], "Python API": [[14, "python-api"]], "C++ API": [[14, "c-api"]], "Indices and tables": [[14, "indices-and-tables"]], "Blogs": [[14, "blogs"]], "Inference Request": [[15, "inference-request"]], "Run gpt-2b + LoRA using GptManager / cpp runtime": [[16, "run-gpt-2b-lora-using-gptmanager-cpp-runtime"]], "LoRA tensor format details": [[16, "lora-tensor-format-details"]], "Example LoRA tensors": [[16, "example-lora-tensors"]], "LoRA Module id mapping": [[16, "lora-module-id-mapping"]], "Memory Usage of TensorRT-LLM": [[17, "memory-usage-of-tensorrt-llm"]], "Understand inference time GPU memory usage": [[17, "understand-inference-time-gpu-memory-usage"]], "Weights size": [[17, "weights-size"]], "Activation size": [[17, "activation-size"]], "KV cache tensor": [[17, "kv-cache-tensor"]], "Python runtime": [[17, "python-runtime"]], "C++ runtime": [[17, "c-runtime"]], "Memory pool": [[17, "memory-pool"]], "Known Issues": [[17, "known-issues"], [20, "known-issues"]], "FAQ": [[17, "faq"]], "New Workflow": [[18, "new-workflow"]], "Prepare the TensorRT-LLM Checkpoint": [[18, "prepare-the-tensorrt-llm-checkpoint"]], "Config": [[18, "config"]], "Rank Weights": [[18, "rank-weights"]], "Attention Weights": [[18, "attention-weights"]], "MLP Weights": [[18, "mlp-weights"]], "LayerNorm Weights": [[18, "layernorm-weights"]], "KV Cache Quantization Scaling Factors": [[18, "kv-cache-quantization-scaling-factors"]], "FP8 Quantization Scaling Factors": [[18, "fp8-quantization-scaling-factors"]], "AWQ Quantization Scaling Factors": [[18, "awq-quantization-scaling-factors"]], "Example": [[18, "example"]], "Build Checkpoint into TensorRT Engine": [[18, "build-checkpoint-into-tensorrt-engine"]], "Make Evaluation": [[18, "make-evaluation"]], "Best Practices for Tuning the Performance of TensorRT-LLM": [[19, "best-practices-for-tuning-the-performance-of-tensorrt-llm"]], "How To Measure Performance?": [[19, "how-to-measure-performance"]], "Build Options to Optimize the Performance of TensorRT-LLM Models?": [[19, "build-options-to-optimize-the-performance-of-tensorrt-llm-models"]], "GPT Attention Plugin and Context Fused Multi-Head Attention": [[19, "gpt-attention-plugin-and-context-fused-multi-head-attention"]], "Remove Input Padding": [[19, "remove-input-padding"]], "Maximum Number of Tokens": [[19, "maximum-number-of-tokens"]], "In-flight Sequence Batching": [[19, "in-flight-sequence-batching"]], "Multi-Block Mode": [[19, "multi-block-mode"]], "Custom AllReduce Plugin": [[19, "custom-allreduce-plugin"]], "Embedding Parallelism, Embedding Sharing, and Look-Up Plugin": [[19, "embedding-parallelism-embedding-sharing-and-look-up-plugin"]], "Horizontal Fusion in Gated-MLP": [[19, "horizontal-fusion-in-gated-mlp"]], "BERT Attention Plugin and Context Fused Multi-Head Attention": [[19, "bert-attention-plugin-and-context-fused-multi-head-attention"]], "Runtime Options to Optimize the Performance of TensorRT-LLM Models?": [[19, "runtime-options-to-optimize-the-performance-of-tensorrt-llm-models"]], "GPT Model Type": [[19, "gpt-model-type"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[19, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Batch Scheduler Policy": [[19, "batch-scheduler-policy"]], "TensorRT Overlap": [[19, "tensorrt-overlap"]], "Maximum Attention Window Size": [[19, "maximum-attention-window-size"]], "Performance of TensorRT-LLM": [[20, "performance-of-tensorrt-llm"]], "Methodology": [[20, "methodology"], [20, "id5"]], "Peak Throughput": [[20, "peak-throughput"]], "H200 GPUs (FP8)": [[20, "h200-gpus-fp8"], [20, "id1"]], "H100 GPUs (FP8)": [[20, "h100-gpus-fp8"], [20, "id2"]], "L40S GPUs (FP8)": [[20, "l40s-gpus-fp8"], [20, "id3"]], "A100 GPUs (FP16)": [[20, "a100-gpus-fp16"], [20, "id4"]], "Low Latency<sup>**</sup>": [[20, "low-latency"]], "Fused Matmul + Gated-SiLU (LLaMA)": [[20, "fused-matmul-gated-silu-llama"]], "Reproducing Benchmarked Results": [[20, "reproducing-benchmarked-results"]], "Building the TensorRT-LLM Container": [[20, "building-the-tensorrt-llm-container"]], "Engine Building Setups": [[20, "engine-building-setups"]], "Running on A100": [[20, "running-on-a100"]], "Reproducing First Token Latency": [[20, "reproducing-first-token-latency"]], "Benchmarking per Model": [[20, "benchmarking-per-model"]], "GPT-J 6B": [[20, "gpt-j-6b"]], "Throughput Benchmark": [[20, "throughput-benchmark"], [20, "id6"], [20, "id8"]], "First Token Latency Benchmark": [[20, "first-token-latency-benchmark"], [20, "id7"], [20, "id9"]], "Llama2-7b": [[20, "llama2-7b"]], "Llama2-70b": [[20, "llama2-70b"]], "Falcon-180B": [[20, "falcon-180b"]], "Performance Analysis of TensorRT-LLM": [[21, "performance-analysis-of-tensorrt-llm"]], "Feature Descriptions": [[21, "feature-descriptions"]], "Usage": [[21, "usage"]], "Inference Time Command Line Options": [[21, "inference-time-command-line-options"]], "Inference Time Environment Variables": [[21, "inference-time-environment-variables"]], "Coordinating with NVIDIA Nsight Systems Launch": [[21, "coordinating-with-nvidia-nsight-systems-launch"]], "Examples": [[21, "examples"]], "Profiling a single IFB iteration executing on a single rank of a multi-GPU model": [[21, "profiling-a-single-ifb-iteration-executing-on-a-single-rank-of-a-multi-gpu-model"]], "Numerical Precision": [[22, "numerical-precision"]], "FP32, FP16 and BF16": [[22, "fp32-fp16-and-bf16"]], "Quantization and Dequantization (Q/DQ)": [[22, "quantization-and-dequantization-q-dq"]], "INT8 SmoothQuant (W8A8)": [[22, "int8-smoothquant-w8a8"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[22, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "GPTQ and AWQ (W4A16)": [[22, "gptq-and-awq-w4a16"]], "FP8 (Hopper)": [[22, "fp8-hopper"]], "Support matrix": [[22, "support-matrix"]], "Technical Detail: The QuantMode Flags": [[22, "technical-detail-the-quantmode-flags"]], "Functionals": [[23, "module-tensorrt_llm.functional"]], "Layers": [[24, "layers"]], "Activation": [[24, "module-tensorrt_llm.layers.activation"]], "Attention": [[24, "module-tensorrt_llm.layers.attention"]], "Cast": [[24, "module-tensorrt_llm.layers.cast"]], "Conv": [[24, "module-tensorrt_llm.layers.conv"]], "Embedding": [[24, "module-tensorrt_llm.layers.embedding"]], "Linear": [[24, "module-tensorrt_llm.layers.linear"]], "MLP": [[24, "module-tensorrt_llm.layers.mlp"]], "Normalization": [[24, "normalization"]], "Pooling": [[24, "module-tensorrt_llm.layers.pooling"]], "Models": [[25, "module-tensorrt_llm.models"]], "Plugin": [[26, "module-tensorrt_llm.plugin"]], "Quantization": [[27, "module-tensorrt_llm.quantization"]]}, "indexentries": {"nvinfer1 (c++ type)": [[2, "_CPPv48nvinfer1"]], "tensorrt_llm (c++ type)": [[2, "_CPPv412tensorrt_llm"]], "tensorrt_llm::batch_manager (c++ type)": [[2, "_CPPv4N12tensorrt_llm13batch_managerE"]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[2, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE"]], "tensorrt_llm::layers (c++ type)": [[2, "_CPPv4N12tensorrt_llm6layersE"]], "tensorrt_llm::runtime (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtimeE"]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb"]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv"]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv"]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv"]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv"]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE"]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE"]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv"]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE"]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr"]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE"]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE"]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer"]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType"]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv"]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::initmemorypool (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi"]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE"]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE"]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv"]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv"]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE"]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv"]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer"]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE"]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type"], [2, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer"]], "tensorrt_llm::runtime::bufferrange::begin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5beginEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange5beginEv"]], "tensorrt_llm::runtime::bufferrange::cbegin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange6cbeginEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange6cbeginEv"]], "tensorrt_llm::runtime::bufferrange::cend (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange4cendEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4cendEv"]], "tensorrt_llm::runtime::bufferrange::const_iterator (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange14const_iteratorE"]], "tensorrt_llm::runtime::bufferrange::const_pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange13const_pointerE"]], "tensorrt_llm::runtime::bufferrange::const_reference (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange15const_referenceE"]], "tensorrt_llm::runtime::bufferrange::end (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange3endEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange3endEv"]], "tensorrt_llm::runtime::bufferrange::iterator (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange8iteratorE"]], "tensorrt_llm::runtime::bufferrange::mdata (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mDataE"]], "tensorrt_llm::runtime::bufferrange::msize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mSizeE"]], "tensorrt_llm::runtime::bufferrange::operator[] (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type"]], "tensorrt_llm::runtime::bufferrange::pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange7pointerE"]], "tensorrt_llm::runtime::bufferrange::reference (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange9referenceE"]], "tensorrt_llm::runtime::bufferrange::size (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4sizeEv"]], "tensorrt_llm::runtime::bufferrange::size_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange9size_typeE"]], "tensorrt_llm::runtime::bufferrange::value_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange10value_typeE"]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEventE"]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb"], [2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj"]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE"]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb"], [2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE"]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer"]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE"]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE"]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv"]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE"]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE"]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv"]], "tensorrt_llm::runtime::cudastream (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE"]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib"], [2, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji"]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE"]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb"], [2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE"]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t"]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE"]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv"]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv"]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE"]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE"]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE"], [2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent"]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv"]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE"], [2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent"]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[2, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[2, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[2, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[2, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE"]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE"]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE"]], "tensorrt_llm::runtime::decodinginput::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE"]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE"]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE"]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE"]], "tensorrt_llm::runtime::decodinginput::finished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE"]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE"]], "tensorrt_llm::runtime::decodinginput::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE"]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE"]], "tensorrt_llm::runtime::decodinginput::maxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12maxBatchSizeE"]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE"]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE"]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE"]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE"]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE"]], "tensorrt_llm::runtime::decodinginput::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE"]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::isdone (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscores (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscores (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidstgt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthstgt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr"]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::finished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE"]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE"]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE"]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE"]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE"]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE"]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE"]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE"]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE"]], "tensorrt_llm::runtime::generationinput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE"]], "tensorrt_llm::runtime::generationinput::base (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE"]], "tensorrt_llm::runtime::generationinput::generationinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::generationinput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::generationoutput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE"]], "tensorrt_llm::runtime::generationoutput::base (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE"]], "tensorrt_llm::runtime::generationoutput::generationoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::generationoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationinput (c++ class)": [[2, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE"]], "tensorrt_llm::runtime::genericgenerationinput::genericgenerationinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::genericgenerationinput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationinput::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE"]], "tensorrt_llm::runtime::genericgenerationinput::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE"]], "tensorrt_llm::runtime::genericgenerationinput::endid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE"]], "tensorrt_llm::runtime::genericgenerationinput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE"]], "tensorrt_llm::runtime::genericgenerationinput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE"]], "tensorrt_llm::runtime::genericgenerationinput::maxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE"]], "tensorrt_llm::runtime::genericgenerationinput::packed (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE"]], "tensorrt_llm::runtime::genericgenerationinput::padid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE"]], "tensorrt_llm::runtime::genericgenerationinput::prompttuningparams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE"]], "tensorrt_llm::runtime::genericgenerationinput::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE"]], "tensorrt_llm::runtime::genericgenerationoutput (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE"]], "tensorrt_llm::runtime::genericgenerationoutput::callback (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE"]], "tensorrt_llm::runtime::genericgenerationoutput::genericgenerationoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::genericgenerationoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationoutput::contextlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE"]], "tensorrt_llm::runtime::genericgenerationoutput::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE"]], "tensorrt_llm::runtime::genericgenerationoutput::generationlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE"]], "tensorrt_llm::runtime::genericgenerationoutput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE"]], "tensorrt_llm::runtime::genericgenerationoutput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE"]], "tensorrt_llm::runtime::genericgenerationoutput::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE"]], "tensorrt_llm::runtime::genericgenerationoutput::ontokengenerated (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE"]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE"]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE"]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE"]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE"]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE"]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE"]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE"]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE"]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_t6size_tRK13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::gathertree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv"]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE"]], "tensorrt_llm::runtime::gptdecoder::mlogprobstiled (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE"]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE"]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE"]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE"]], "tensorrt_llm::runtime::gptdecoderbatch::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodinginputptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodingoutputptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::finalize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"], [2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"], [2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getallnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbsteps (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getparentids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::macceptbylogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mAcceptByLogitsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mactualbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbeamwidths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbuffermanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE"]], "tensorrt_llm::runtime::gptdecoderbatch::mcurandstates (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mCurandStatesE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecoders (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodinginputs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodingoutputs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdraftlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mDraftLogitsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdraftprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mDraftProbsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdrafttokenids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinishedsteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mFinishedStepsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinishedsum (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardtoken (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE"]], "tensorrt_llm::runtime::gptdecoderbatch::mgeneratedtokensperstep (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mGeneratedTokensPerStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodinginput (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodingoutput (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxattentionwindow (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mMaxAttentionWindowE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxtokensperstep (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mMaxTokensPerStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnbsteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnumdrafttokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::msinktokenlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mSinkTokenLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstreams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mtargetprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mTargetProbsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsizepadded (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE"]], "tensorrt_llm::runtime::gptdecoderbatch::newbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequest (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::postprocessrequest (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig"]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig"], [2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE"]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv"]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv"]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv"]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv"]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv"]], "tensorrt_llm::runtime::gptjsonconfig::mgptmodelconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE"]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE"]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE"]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE"], [2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE"], [2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE"]], "tensorrt_llm::runtime::gptmodelconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE"]], "tensorrt_llm::runtime::gptmodelconfig::gptmodelconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant (c++ enum)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kglm (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kgpt (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE"]], "tensorrt_llm::runtime::gptmodelconfig::computecontextlogits (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::computegenerationlogits (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getcontextfmhaforgeneration (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig27getContextFMHAForGenerationEv"]], "tensorrt_llm::runtime::gptmodelconfig::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv"]], "tensorrt_llm::runtime::gptmodelconfig::gethiddensize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getloramodules (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getLoraModulesEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxbatchsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxbeamwidth (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBeamWidthEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxdraftlen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxDraftLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxinputlen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxlorarank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxLoraRankEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxnumtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxpromptembeddingtablesize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxsequencelen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getMaxSequenceLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxtokensperstep (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmlphiddensize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig16getMlpHiddenSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmodelvariant (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbheads (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbkvheads (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnblayers (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::getpagedcontextfmha (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getPagedContextFMHAEv"]], "tensorrt_llm::runtime::gptmodelconfig::getquantmode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getsizeperhead (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv"]], "tensorrt_llm::runtime::gptmodelconfig::gettokensperblock (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsizepadded (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputecontextlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputegenerationlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mdatatype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::mhiddensize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::minputpacked (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE"]], "tensorrt_llm::runtime::gptmodelconfig::mloramodules (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mLoraModulesE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxbeamwidth (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBeamWidthE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxdraftlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxinputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxlorarank (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxLoraRankE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxnumtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxpromptembeddingtablesize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxsequencelen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mMaxSequenceLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmlphiddensize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mMlpHiddenSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmodelvariant (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbheads (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbkvheads (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnblayers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedcontextfmha (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17mPagedContextFMHAE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedkvcache (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE"]], "tensorrt_llm::runtime::gptmodelconfig::mquantmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::msizeperhead (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mSizePerHeadE"]], "tensorrt_llm::runtime::gptmodelconfig::mtokensperblock (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE"]], "tensorrt_llm::runtime::gptmodelconfig::musecontextfmhaforgeneration (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mUseContextFMHAForGenerationE"]], "tensorrt_llm::runtime::gptmodelconfig::musecustomallreduce (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE"]], "tensorrt_llm::runtime::gptmodelconfig::musegptattentionplugin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::museloraplugin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mUseLoraPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::mvocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::setloramodules (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxbatchsize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxbeamwidth (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxdraftlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxinputlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxlorarank (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxnumtokens (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxpromptembeddingtablesize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxsequencelen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmlphiddensize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmodelvariant (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant"]], "tensorrt_llm::runtime::gptmodelconfig::setnbkvheads (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setpagedcontextfmha (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb"]], "tensorrt_llm::runtime::gptmodelconfig::setquantmode (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::setsizeperhead (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::settokensperblock (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setusecontextfmhaforgeneration (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb"]], "tensorrt_llm::runtime::gptmodelconfig::supportsinflightbatching (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv"]], "tensorrt_llm::runtime::gptmodelconfig::usecustomallreduce (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv"]], "tensorrt_llm::runtime::gptmodelconfig::usegptattentionplugin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::useloraplugin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepackedinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepagedkvcache (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv"]], "tensorrt_llm::runtime::gptmodelconfig::useprompttuning (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv"]], "tensorrt_llm::runtime::gptsession (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSessionE"]], "tensorrt_llm::runtime::gptsession::config (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE"]], "tensorrt_llm::runtime::gptsession::config::config (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::config::ctxmicrobatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::cudagraphmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::config::decoderperrequest (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE"]], "tensorrt_llm::runtime::gptsession::config::genmicrobatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::kvcacheconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE"]], "tensorrt_llm::runtime::gptsession::config::maxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::maxbeamwidth (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE"]], "tensorrt_llm::runtime::gptsession::config::maxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::cudagraphexecutor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::clear (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::create (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::hasinstance (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::launch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::minstance (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::preparenextgraph (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::update (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::uploadtostream (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::~cudagraphexecutor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev"]], "tensorrt_llm::runtime::gptsession::gptsession (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr"]], "tensorrt_llm::runtime::gptsession::kvcacheconfig (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE"]], "tensorrt_llm::runtime::gptsession::kvcachemanager (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::loggerptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::microbatchconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::ctxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::genbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::getgengraphid (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numctxbatches (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numctxpergen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numgenbatches (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE"]], "tensorrt_llm::runtime::gptsession::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE"]], "tensorrt_llm::runtime::gptsession::tokengeneratedcallback (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE"]], "tensorrt_llm::runtime::gptsession::createbuffers (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType"]], "tensorrt_llm::runtime::gptsession::createcontexts (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsEv"]], "tensorrt_llm::runtime::gptsession::createcustomallreduceworkspace (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::createdecoders (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType"]], "tensorrt_llm::runtime::gptsession::createkvcachemanager (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig"]], "tensorrt_llm::runtime::gptsession::createontokengeneratedcallback (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput"]], "tensorrt_llm::runtime::gptsession::decoderstepasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::executecontextstep (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager"]], "tensorrt_llm::runtime::gptsession::executegenerationstep (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE"]], "tensorrt_llm::runtime::gptsession::finalize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType"]], "tensorrt_llm::runtime::gptsession::generate (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptsession::generatebatched (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback"]], "tensorrt_llm::runtime::gptsession::getbuffermanager (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv"]], "tensorrt_llm::runtime::gptsession::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv"]], "tensorrt_llm::runtime::gptsession::getlogger (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv"]], "tensorrt_llm::runtime::gptsession::getlogitdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getLogitDataTypeEv"]], "tensorrt_llm::runtime::gptsession::getmodelconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv"]], "tensorrt_llm::runtime::gptsession::getworldconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv"]], "tensorrt_llm::runtime::gptsession::initdecoder (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType"]], "tensorrt_llm::runtime::gptsession::kvcacheaddsequences (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::mbuffers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE"]], "tensorrt_llm::runtime::gptsession::mcommevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE"]], "tensorrt_llm::runtime::gptsession::mcommptrs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE"]], "tensorrt_llm::runtime::gptsession::mcommstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE"]], "tensorrt_llm::runtime::gptsession::mcudagraphinstances (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE"]], "tensorrt_llm::runtime::gptsession::mcudagraphmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxattentionwindow (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession26mDecoderMaxAttentionWindowE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::mdecodersinktokenlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession23mDecoderSinkTokenLengthE"]], "tensorrt_llm::runtime::gptsession::mdecoders (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE"]], "tensorrt_llm::runtime::gptsession::mdevice (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE"]], "tensorrt_llm::runtime::gptsession::mipcmemoryhandles (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE"]], "tensorrt_llm::runtime::gptsession::mkvcachemanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::mlogger (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE"]], "tensorrt_llm::runtime::gptsession::mmicrobatchconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE"]], "tensorrt_llm::runtime::gptsession::mmodelconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE"]], "tensorrt_llm::runtime::gptsession::mpipelinecomm (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE"]], "tensorrt_llm::runtime::gptsession::mreceivedevents (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE"]], "tensorrt_llm::runtime::gptsession::mruntime (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE"]], "tensorrt_llm::runtime::gptsession::mworldconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE"]], "tensorrt_llm::runtime::gptsession::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config"]], "tensorrt_llm::runtime::gptsession::shouldstopsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::usecudagraphs (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv"]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferE"]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE"]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv"]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE"]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE"]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv"], [2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv"]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv"]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv"]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv"]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv"]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv"]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv"]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer"]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv"]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev"]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE"]], "tensorrt_llm::runtime::igptdecoder::acceptdrafttokensbyids (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::acceptdrafttokensbylogits (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::gathertree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv"]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType"]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev"]], "tensorrt_llm::runtime::igptdecoderbatch (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE"]], "tensorrt_llm::runtime::igptdecoderbatch::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::igptdecoderbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv"]], "tensorrt_llm::runtime::igptdecoderbatch::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::tokenptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::finalize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"]], "tensorrt_llm::runtime::igptdecoderbatch::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getnbsteps (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::getparentids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::newrequest (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE"]], "tensorrt_llm::runtime::istatefulgptdecoder::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::istatefulgptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::finalize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getallnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnbfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType"]], "tensorrt_llm::runtime::istatefulgptdecoder::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::newbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::istatefulgptdecoder::~istatefulgptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev"]], "tensorrt_llm::runtime::itensor (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensorE"]], "tensorrt_llm::runtime::itensor::dimtype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE"]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv"]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE"]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE"]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE"]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE"]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE"]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t"]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv"]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE"]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor"]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape"]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE"]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape"], [2, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape"], [2, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE"]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape"]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape"]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape"]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape"]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE"]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev"]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE"]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE"]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE"]], "tensorrt_llm::runtime::ipcmemory::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE"]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::getcommptrstensor (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv"]], "tensorrt_llm::runtime::ipcmemory::mbufferptr (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE"]], "tensorrt_llm::runtime::ipcmemory::mbuffersize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE"]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE"]], "tensorrt_llm::runtime::ipcmemory::mworldconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE"]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev"]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE"]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE"]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv"]], "tensorrt_llm::runtime::memorycounters::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE"]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei"]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv"]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv"]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv"]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv"]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv"]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv"]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv"]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE"]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE"]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE"]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE"]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE"]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE"]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME"]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE"]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv"]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE"]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE"]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE"]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE"]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME"]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE"]], "tensorrt_llm::runtime::phonynameduetoerror::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE"]], "tensorrt_llm::runtime::phonynameduetoerror::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE"]], "tensorrt_llm::runtime::phonynameduetoerror::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE"]], "tensorrt_llm::runtime::phonynameduetoerror::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE"]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE"]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE"]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::prompttuningparams::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE"]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE"]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb"]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE"]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE"]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE"]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType"]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE"]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE"]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE"]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE"]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE"]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE"]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE"]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE"]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE"]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE"]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE"]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE"]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE"]], "tensorrt_llm::runtime::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE"]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE"]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[2, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE"]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE"]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE"]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE"]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE"]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE"]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE"]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE"]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE"]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv"]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE"]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity"]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE"]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE"]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE"]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv"]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv"]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv"]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv"]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv"]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv"]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE"]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE"]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE"]], "tensorrt_llm::runtime::worldconfig::validconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigE8SizeType8SizeType"]], "tensorrt_llm::runtime::buffercast (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer"], [2, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer"]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[2, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE"]], "tensorrt_llm::runtime::decoder (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoderE"]], "tensorrt_llm::runtime::decoder::input (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE"]], "tensorrt_llm::runtime::decoder::input::input (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr"]], "tensorrt_llm::runtime::decoder::input::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder::input::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::input::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE"]], "tensorrt_llm::runtime::decoder::output (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE"]], "tensorrt_llm::runtime::decoder::output::output (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv"]], "tensorrt_llm::runtime::decoder::output::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE"]], "tensorrt_llm::runtime::decoder::output::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::output::sequencelengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE"]], "tensorrt_llm::runtime::decoder_batch (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE"]], "tensorrt_llm::runtime::decoder_batch::input (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE"]], "tensorrt_llm::runtime::decoder_batch::input::input (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::input::tensorconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE"]], "tensorrt_llm::runtime::decoder_batch::input::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::input::active (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE"]], "tensorrt_llm::runtime::decoder_batch::input::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder_batch::input::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE"]], "tensorrt_llm::runtime::decoder_batch::output (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE"]], "tensorrt_llm::runtime::decoder_batch::request (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE"]], "tensorrt_llm::runtime::decoder_batch::request::bufferptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::consttensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::request (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::decoder_batch::request::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE"]], "tensorrt_llm::runtime::decoder_batch::request::computecumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE"]], "tensorrt_llm::runtime::decoder_batch::request::computelogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE"]], "tensorrt_llm::runtime::decoder_batch::request::draftlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE"]], "tensorrt_llm::runtime::decoder_batch::request::drafttokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE"]], "tensorrt_llm::runtime::decoder_batch::request::endid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE"]], "tensorrt_llm::runtime::decoder_batch::request::generatedtokensperstep (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13decoder_batch7Request22generatedTokensPerStepEv"]], "tensorrt_llm::runtime::decoder_batch::request::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE"]], "tensorrt_llm::runtime::decoder_batch::request::inputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE"]], "tensorrt_llm::runtime::decoder_batch::request::maxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE"]], "tensorrt_llm::runtime::decoder_batch::token (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE"]], "tensorrt_llm::runtime::decoder_batch::token::token (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::token::active (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE"]], "tensorrt_llm::runtime::decoder_batch::token::event (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE"]], "tensorrt_llm::runtime::operator<< (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer"], [2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor"], [2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE"]], "tensorrt_llm::runtime::setpeeraccess (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb"]], "tensorrt_llm::runtime::utils (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime5utilsE"]], "tensorrt_llm::runtime::utils::loadengine (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE"]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[23, "tensorrt_llm.functional.AllReduceStrategy.AUTO"]], "allreducestrategy (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.AllReduceStrategy"]], "attentionmasktype (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.AttentionMaskType"]], "dimrange (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.DimRange"]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[23, "tensorrt_llm.functional.MLPType.FusedGatedMLP"]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[23, "tensorrt_llm.functional.MLPType.GatedMLP"]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[23, "tensorrt_llm.functional.LayerNormType.GroupNorm"]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[23, "tensorrt_llm.functional.LayerNormType.LayerNorm"]], "layernormpositiontype (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.LayerNormPositionType"]], "layernormtype (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.LayerNormType"]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[23, "tensorrt_llm.functional.MLPType.MLP"]], "mlptype (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.MLPType"]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[23, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT"]], "positionembeddingtype (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.PositionEmbeddingType"]], "ring (tensorrt_llm.functional.allreducestrategy attribute)": [[23, "tensorrt_llm.functional.AllReduceStrategy.RING"]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[23, "tensorrt_llm.functional.LayerNormType.RmsNorm"]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.RotaryScalingType"]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[23, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT"]], "tensor (class in tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.Tensor"]], "abs() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.abs"]], "abs() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.abs"]], "activation() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.activation"]], "add() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.add"]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.alibi"]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale"]], "allgather() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.allgather"]], "allreduce() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.allreduce"]], "arange() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.arange"]], "argmax() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.argmax"]], "assertion() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.assertion"]], "avg_pool2d() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.avg_pool2d"]], "bert_attention() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.bert_attention"]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[23, "tensorrt_llm.functional.AttentionMaskType.bidirectional"]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[23, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm"]], "broadcast_helper() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.broadcast_helper"]], "cast() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.cast"]], "cast() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.cast"]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[23, "tensorrt_llm.functional.AttentionMaskType.causal"]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.chatglm"]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.choices"]], "chunk() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.chunk"]], "clip() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.clip"]], "concat() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.concat"]], "constant() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.constant"]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.constant_to_tensor_"]], "conv1d() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.conv1d"]], "conv2d() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.conv2d"]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.conv_transpose2d"]], "cos() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.cos"]], "cumsum() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.cumsum"]], "div() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.div"]], "dtype (tensorrt_llm.functional.tensor property)": [[23, "tensorrt_llm.functional.Tensor.dtype"]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[23, "tensorrt_llm.functional.RotaryScalingType.dynamic"]], "einsum() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.einsum"]], "elementwise_binary() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.elementwise_binary"]], "embedding() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.embedding"]], "eq() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.eq"]], "exp() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.exp"]], "expand() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.expand"]], "expand_dims() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.expand_dims"]], "expand_dims_like() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.expand_dims_like"]], "expand_mask() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.expand_mask"]], "flip() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.flip"]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.from_string"]], "gather() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.gather"]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.gather_last_token_logits"]], "geglu() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.geglu"]], "gelu() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.gelu"]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.generate_alibi_biases"]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.generate_alibi_slopes"]], "get_parent() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.get_parent"]], "get_users() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.get_users"]], "gpt_attention() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.gpt_attention"]], "group_norm() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.group_norm"]], "gt() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.gt"]], "identity() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.identity"]], "index_select() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.index_select"]], "interpolate() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.interpolate"]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi"]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.is_dynamic"]], "is_gated_activation() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.is_gated_activation"]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.is_rope"]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.is_trt_wrapper"]], "layer_norm() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.layer_norm"]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute"]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[23, "tensorrt_llm.functional.RotaryScalingType.linear"]], "location (tensorrt_llm.functional.tensor property)": [[23, "tensorrt_llm.functional.Tensor.location"]], "lora_plugin() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.lora_plugin"]], "lt() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.lt"]], "mark_output() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.mark_output"]], "masked_select() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.masked_select"]], "matmul() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.matmul"]], "max() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.max"]], "max() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.max"]], "maximum() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.maximum"]], "mean() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.mean"]], "mean() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.mean"]], "minimum() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.minimum"]], "module": [[23, "module-tensorrt_llm.functional"], [24, "module-tensorrt_llm.layers.activation"], [24, "module-tensorrt_llm.layers.attention"], [24, "module-tensorrt_llm.layers.cast"], [24, "module-tensorrt_llm.layers.conv"], [24, "module-tensorrt_llm.layers.embedding"], [24, "module-tensorrt_llm.layers.linear"], [24, "module-tensorrt_llm.layers.mlp"], [24, "module-tensorrt_llm.layers.normalization"], [24, "module-tensorrt_llm.layers.pooling"], [25, "module-tensorrt_llm.models"], [26, "module-tensorrt_llm.plugin"], [27, "module-tensorrt_llm.quantization"], [28, "module-tensorrt_llm.runtime"]], "mul() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.mul"]], "name (tensorrt_llm.functional.tensor property)": [[23, "tensorrt_llm.functional.Tensor.name"]], "ndim() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.ndim"]], "network (tensorrt_llm.functional.tensor property)": [[23, "tensorrt_llm.functional.Tensor.network"]], "non_gated_version() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.non_gated_version"]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[23, "tensorrt_llm.functional.RotaryScalingType.none"]], "op_and() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.op_and"]], "op_or() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.op_or"]], "outer() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.outer"]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[23, "tensorrt_llm.functional.AttentionMaskType.padding"]], "permute() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.permute"]], "permute() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.permute"]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[23, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm"]], "pow() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.pow"]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[23, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm"]], "rank() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.rank"]], "recv() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.recv"]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.relative"]], "relu() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.relu"]], "repeat_interleave() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.repeat_interleave"]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.replace_all_uses_with"]], "rms_norm() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.rms_norm"]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox"]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[23, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj"]], "round() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.round"]], "select() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.select"]], "selective_scan() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.selective_scan"]], "send() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.send"]], "shape (tensorrt_llm.functional.tensor property)": [[23, "tensorrt_llm.functional.Tensor.shape"]], "shape() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.shape"]], "sigmoid() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.sigmoid"]], "silu() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.silu"]], "sin() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.sin"]], "size() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.size"]], "slice() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.slice"]], "softmax() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.softmax"]], "softplus() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.softplus"]], "split() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.split"]], "split() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.split"]], "sqrt() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.sqrt"]], "sqrt() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.sqrt"]], "squared_relu() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.squared_relu"]], "stack() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.stack"]], "sub() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.sub"]], "swiglu() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.swiglu"]], "tanh() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.tanh"]], "tensorrt_llm.functional": [[23, "module-tensorrt_llm.functional"]], "transpose() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.transpose"]], "transpose() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.transpose"]], "unary() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.unary"]], "unsqueeze() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.unsqueeze"]], "view() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.view"]], "view() (tensorrt_llm.functional.tensor method)": [[23, "tensorrt_llm.functional.Tensor.view"]], "where() (in module tensorrt_llm.functional)": [[23, "tensorrt_llm.functional.where"]], "attention (class in tensorrt_llm.layers.attention)": [[24, "tensorrt_llm.layers.attention.Attention"]], "attentionparams (class in tensorrt_llm.layers.attention)": [[24, "tensorrt_llm.layers.attention.AttentionParams"]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[24, "tensorrt_llm.layers.pooling.AvgPool2d"]], "bertattention (class in tensorrt_llm.layers.attention)": [[24, "tensorrt_llm.layers.attention.BertAttention"]], "cast (class in tensorrt_llm.layers.cast)": [[24, "tensorrt_llm.layers.cast.Cast"]], "columnlinear (in module tensorrt_llm.layers.linear)": [[24, "tensorrt_llm.layers.linear.ColumnLinear"]], "conv1d (class in tensorrt_llm.layers.conv)": [[24, "tensorrt_llm.layers.conv.Conv1d"]], "conv2d (class in tensorrt_llm.layers.conv)": [[24, "tensorrt_llm.layers.conv.Conv2d"]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[24, "tensorrt_llm.layers.conv.ConvTranspose2d"]], "embedding (class in tensorrt_llm.layers.embedding)": [[24, "tensorrt_llm.layers.embedding.Embedding"]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[24, "tensorrt_llm.layers.mlp.FusedGatedMLP"]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[24, "tensorrt_llm.layers.mlp.GatedMLP"]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[24, "tensorrt_llm.layers.normalization.GroupNorm"]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[24, "tensorrt_llm.layers.attention.KeyValueCacheParams"]], "layernorm (class in tensorrt_llm.layers.normalization)": [[24, "tensorrt_llm.layers.normalization.LayerNorm"]], "linear (class in tensorrt_llm.layers.linear)": [[24, "tensorrt_llm.layers.linear.Linear"]], "mlp (class in tensorrt_llm.layers.mlp)": [[24, "tensorrt_llm.layers.mlp.MLP"]], "mish (class in tensorrt_llm.layers.activation)": [[24, "tensorrt_llm.layers.activation.Mish"]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[24, "tensorrt_llm.layers.embedding.PromptTuningEmbedding"]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[24, "tensorrt_llm.layers.normalization.RmsNorm"]], "ropeembeddingutils (class in tensorrt_llm.layers.attention)": [[24, "tensorrt_llm.layers.attention.RopeEmbeddingUtils"]], "rowlinear (class in tensorrt_llm.layers.linear)": [[24, "tensorrt_llm.layers.linear.RowLinear"]], "apply_rotary_pos_emb() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[24, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb"]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[24, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm"]], "create_sinusoidal_positions() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[24, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.create_sinusoidal_positions"]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[24, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list"]], "forward() (tensorrt_llm.layers.activation.mish method)": [[24, "tensorrt_llm.layers.activation.Mish.forward"]], "forward() (tensorrt_llm.layers.attention.attention method)": [[24, "tensorrt_llm.layers.attention.Attention.forward"]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[24, "tensorrt_llm.layers.attention.BertAttention.forward"]], "forward() (tensorrt_llm.layers.cast.cast method)": [[24, "tensorrt_llm.layers.cast.Cast.forward"]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[24, "tensorrt_llm.layers.conv.Conv1d.forward"]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[24, "tensorrt_llm.layers.conv.Conv2d.forward"]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[24, "tensorrt_llm.layers.conv.ConvTranspose2d.forward"]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[24, "tensorrt_llm.layers.embedding.Embedding.forward"]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[24, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward"]], "forward() (tensorrt_llm.layers.linear.linear method)": [[24, "tensorrt_llm.layers.linear.Linear.forward"]], "forward() (tensorrt_llm.layers.linear.rowlinear method)": [[24, "tensorrt_llm.layers.linear.RowLinear.forward"]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[24, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[24, "tensorrt_llm.layers.mlp.GatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[24, "tensorrt_llm.layers.mlp.MLP.forward"]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[24, "tensorrt_llm.layers.normalization.GroupNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[24, "tensorrt_llm.layers.normalization.LayerNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[24, "tensorrt_llm.layers.normalization.RmsNorm.forward"]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[24, "tensorrt_llm.layers.pooling.AvgPool2d.forward"]], "get_first_host_kv_cache_block_pointers() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[24, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_host_kv_cache_block_pointers"]], "get_first_kv_cache_block_pointers() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[24, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_kv_cache_block_pointers"]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[24, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value"]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[24, "tensorrt_llm.layers.attention.AttentionParams.is_valid"]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[24, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid"]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[24, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn"]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[24, "tensorrt_llm.layers.attention.make_causal_mask"]], "multiply_gather() (tensorrt_llm.layers.linear.linear method)": [[24, "tensorrt_llm.layers.linear.Linear.multiply_gather"]], "multiply_reduce() (tensorrt_llm.layers.linear.rowlinear method)": [[24, "tensorrt_llm.layers.linear.RowLinear.multiply_reduce"]], "rotate_every_two() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[24, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_every_two"]], "rotate_half() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[24, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_half"]], "tensorrt_llm.layers.activation": [[24, "module-tensorrt_llm.layers.activation"]], "tensorrt_llm.layers.attention": [[24, "module-tensorrt_llm.layers.attention"]], "tensorrt_llm.layers.cast": [[24, "module-tensorrt_llm.layers.cast"]], "tensorrt_llm.layers.conv": [[24, "module-tensorrt_llm.layers.conv"]], "tensorrt_llm.layers.embedding": [[24, "module-tensorrt_llm.layers.embedding"]], "tensorrt_llm.layers.linear": [[24, "module-tensorrt_llm.layers.linear"]], "tensorrt_llm.layers.mlp": [[24, "module-tensorrt_llm.layers.mlp"]], "tensorrt_llm.layers.normalization": [[24, "module-tensorrt_llm.layers.normalization"]], "tensorrt_llm.layers.pooling": [[24, "module-tensorrt_llm.layers.pooling"]], "baichuanforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.BaichuanForCausalLM"]], "bertforquestionanswering (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.BertForQuestionAnswering"]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.BertForSequenceClassification"]], "bertmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.BertModel"]], "bloomforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.BloomForCausalLM"]], "bloommodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.BloomModel"]], "chatglmforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.ChatGLMForCausalLM"]], "chatglmmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.ChatGLMModel"]], "decodermodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.DecoderModel"]], "encodermodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.EncoderModel"]], "falconforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.FalconForCausalLM"]], "falconmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.FalconModel"]], "gptjforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.GPTJForCausalLM"]], "gptjmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.GPTJModel"]], "gptlmheadmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.GPTLMHeadModel"]], "gptmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.GPTModel"]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.GPTNeoXForCausalLM"]], "gptneoxmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.GPTNeoXModel"]], "llamaforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.LLaMAForCausalLM"]], "llamamodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.LLaMAModel"]], "mptforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.MPTForCausalLM"]], "mptmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.MPTModel"]], "mambalmheadmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.MambaLMHeadModel"]], "medusaforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.MedusaForCausalLm"]], "optforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.OPTForCausalLM"]], "optmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.OPTModel"]], "phiforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.PhiForCausalLM"]], "phimodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.PhiModel"]], "pretrainedconfig (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.PretrainedConfig"]], "pretrainedmodel (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.PretrainedModel"]], "qwenforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.QWenForCausalLM"]], "skyworkforcausallm (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.SkyworkForCausalLM"]], "whisperencoder (class in tensorrt_llm.models)": [[25, "tensorrt_llm.models.WhisperEncoder"]], "check_config() (tensorrt_llm.models.chatglmforcausallm method)": [[25, "tensorrt_llm.models.ChatGLMForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[25, "tensorrt_llm.models.FalconForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.gptjforcausallm method)": [[25, "tensorrt_llm.models.GPTJForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.llamaforcausallm method)": [[25, "tensorrt_llm.models.LLaMAForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[25, "tensorrt_llm.models.MPTForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[25, "tensorrt_llm.models.OPTForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[25, "tensorrt_llm.models.PhiForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[25, "tensorrt_llm.models.PretrainedModel.check_config"]], "check_config() (tensorrt_llm.models.skyworkforcausallm method)": [[25, "tensorrt_llm.models.SkyworkForCausalLM.check_config"]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[25, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config"]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[25, "tensorrt_llm.models.BertForQuestionAnswering.forward"]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[25, "tensorrt_llm.models.BertForSequenceClassification.forward"]], "forward() (tensorrt_llm.models.bertmodel method)": [[25, "tensorrt_llm.models.BertModel.forward"]], "forward() (tensorrt_llm.models.bloommodel method)": [[25, "tensorrt_llm.models.BloomModel.forward"]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[25, "tensorrt_llm.models.ChatGLMModel.forward"]], "forward() (tensorrt_llm.models.decodermodel method)": [[25, "tensorrt_llm.models.DecoderModel.forward"]], "forward() (tensorrt_llm.models.encodermodel method)": [[25, "tensorrt_llm.models.EncoderModel.forward"]], "forward() (tensorrt_llm.models.falconmodel method)": [[25, "tensorrt_llm.models.FalconModel.forward"]], "forward() (tensorrt_llm.models.gptjmodel method)": [[25, "tensorrt_llm.models.GPTJModel.forward"]], "forward() (tensorrt_llm.models.gptlmheadmodel method)": [[25, "tensorrt_llm.models.GPTLMHeadModel.forward"]], "forward() (tensorrt_llm.models.gptmodel method)": [[25, "tensorrt_llm.models.GPTModel.forward"]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[25, "tensorrt_llm.models.GPTNeoXModel.forward"]], "forward() (tensorrt_llm.models.llamamodel method)": [[25, "tensorrt_llm.models.LLaMAModel.forward"]], "forward() (tensorrt_llm.models.mptmodel method)": [[25, "tensorrt_llm.models.MPTModel.forward"]], "forward() (tensorrt_llm.models.mambalmheadmodel method)": [[25, "tensorrt_llm.models.MambaLMHeadModel.forward"]], "forward() (tensorrt_llm.models.medusaforcausallm method)": [[25, "tensorrt_llm.models.MedusaForCausalLm.forward"]], "forward() (tensorrt_llm.models.optmodel method)": [[25, "tensorrt_llm.models.OPTModel.forward"]], "forward() (tensorrt_llm.models.phimodel method)": [[25, "tensorrt_llm.models.PhiModel.forward"]], "forward() (tensorrt_llm.models.qwenforcausallm method)": [[25, "tensorrt_llm.models.QWenForCausalLM.forward"]], "forward() (tensorrt_llm.models.whisperencoder method)": [[25, "tensorrt_llm.models.WhisperEncoder.forward"]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[25, "tensorrt_llm.models.PretrainedModel.from_checkpoint"]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[25, "tensorrt_llm.models.PretrainedModel.from_config"]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[25, "tensorrt_llm.models.PretrainedConfig.from_dict"]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[25, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face"]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[25, "tensorrt_llm.models.PretrainedConfig.from_json_file"]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[25, "tensorrt_llm.models.PretrainedModel.load"]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[25, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[25, "tensorrt_llm.models.DecoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[25, "tensorrt_llm.models.EncoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptlmheadmodel method)": [[25, "tensorrt_llm.models.GPTLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.mambalmheadmodel method)": [[25, "tensorrt_llm.models.MambaLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.medusaforcausallm method)": [[25, "tensorrt_llm.models.MedusaForCausalLm.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[25, "tensorrt_llm.models.PretrainedModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.qwenforcausallm method)": [[25, "tensorrt_llm.models.QWenForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[25, "tensorrt_llm.models.WhisperEncoder.prepare_inputs"]], "quantize_model() (in module tensorrt_llm.models)": [[25, "tensorrt_llm.models.quantize_model"]], "rotary_base() (tensorrt_llm.models.llamaforcausallm method)": [[25, "tensorrt_llm.models.LLaMAForCausalLM.rotary_base"]], "rotary_scaling() (tensorrt_llm.models.llamaforcausallm method)": [[25, "tensorrt_llm.models.LLaMAForCausalLM.rotary_scaling"]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[25, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist"]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[25, "tensorrt_llm.models.PretrainedConfig.set_rank"]], "tensorrt_llm.models": [[25, "module-tensorrt_llm.models"]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[25, "tensorrt_llm.models.PretrainedConfig.to_dict"]], "pluginconfig (class in tensorrt_llm.plugin)": [[26, "tensorrt_llm.plugin.PluginConfig"]], "tensorrt_llm.plugin": [[26, "module-tensorrt_llm.plugin"]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[26, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting"]], "quantmode (class in tensorrt_llm.quantization)": [[27, "tensorrt_llm.quantization.QuantMode"]], "tensorrt_llm.quantization": [[27, "module-tensorrt_llm.quantization"]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.ChatGLMGenerationSession"]], "generationsequence (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.GenerationSequence"]], "generationsession (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.GenerationSession"]], "kvcachemanager (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.KVCacheManager"]], "logitsprocessor (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.LogitsProcessor"]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.LogitsProcessorList"]], "mambalmheadmodelgenerationsession (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession"]], "modelconfig (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.ModelConfig"]], "modelrunner (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.ModelRunner"]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession"]], "session (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.Session"]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.StoppingCriteria"]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.StoppingCriteriaList"]], "tensorinfo (class in tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.TensorInfo"]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[28, "tensorrt_llm.runtime.KVCacheManager.add_sequence"]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.batch_size"]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.buffer_allocated"]], "context (tensorrt_llm.runtime.session property)": [[28, "tensorrt_llm.runtime.Session.context"]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.cross_attention"]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.cross_attention"]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode"]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard"]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.debug_mode"]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save"]], "decode() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.decode"]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.decode_batch"]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.decode_regular"]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.decode_stream"]], "device (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.device"]], "dtype (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.dtype"]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.dtype"]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.dtype"]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[28, "tensorrt_llm.runtime.TensorInfo.dtype"]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers"]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria"]], "engine (tensorrt_llm.runtime.session property)": [[28, "tensorrt_llm.runtime.Session.engine"]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits"]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.finalize_decoder"]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path"]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.first_layer"]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[28, "tensorrt_llm.runtime.ModelRunner.from_dir"]], "from_engine() (tensorrt_llm.runtime.session static method)": [[28, "tensorrt_llm.runtime.Session.from_engine"]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[28, "tensorrt_llm.runtime.Session.from_serialized_engine"]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.gather_context_logits"]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.gather_context_logits"]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.gather_context_logits"]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits"]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits"]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits"]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[28, "tensorrt_llm.runtime.ModelRunner.generate"]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[28, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate"]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[28, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx"]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens"]], "get_pointer_arrays() (tensorrt_llm.runtime.kvcachemanager method)": [[28, "tensorrt_llm.runtime.KVCacheManager.get_pointer_arrays"]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[28, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx"]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin"]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.handle_per_step"]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.has_position_embedding"]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.has_position_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding"]], "head_size (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.head_size"]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.head_size"]], "hf_modules_to_trtllm_modules (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.hf_modules_to_trtllm_modules"]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.hidden_size"]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[28, "tensorrt_llm.runtime.Session.infer_shapes"]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode"]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.last_layer"]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.lora_plugin"]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.lora_target_modules"]], "mamba_d_conv (tensorrt_llm.runtime.mambalmheadmodelgenerationsession property)": [[28, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_d_conv"]], "mamba_d_conv (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.mamba_d_conv"]], "mamba_d_state (tensorrt_llm.runtime.mambalmheadmodelgenerationsession property)": [[28, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_d_state"]], "mamba_d_state (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.mamba_d_state"]], "mamba_expand (tensorrt_llm.runtime.mambalmheadmodelgenerationsession property)": [[28, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_expand"]], "mamba_expand (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.mamba_expand"]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.mapping"]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.max_batch_size"]], "max_medusa_tokens (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.max_medusa_tokens"]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size"]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.max_sequence_length"]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.medusa_paths"]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets"]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.medusa_temperature"]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.medusa_topks"]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids"]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.model_name"]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[28, "tensorrt_llm.runtime.TensorInfo.name"]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids"]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.num_heads"]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.num_heads"]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.num_heads"]], "num_heads_kv (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.num_heads_kv"]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.num_kv_heads"]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.num_layers"]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.num_layers"]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.num_layers"]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads"]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads"]], "num_medusa_tokens (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.num_medusa_tokens"]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache"]], "paged_kv_cache (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.paged_kv_cache"]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids"]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens"]], "process_logits_for_medusa_mode() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.process_logits_for_medusa_mode"]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.quant_mode"]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.quant_mode"]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.remove_input_padding"]], "run() (tensorrt_llm.runtime.session method)": [[28, "tensorrt_llm.runtime.Session.run"]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[28, "tensorrt_llm.runtime.GenerationSession.runtime"]], "runtime (tensorrt_llm.runtime.session property)": [[28, "tensorrt_llm.runtime.Session.runtime"]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[28, "tensorrt_llm.runtime.ModelRunner.serialize_engine"]], "set_shapes() (tensorrt_llm.runtime.session method)": [[28, "tensorrt_llm.runtime.Session.set_shapes"]], "setup() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.setup"]], "setup() (tensorrt_llm.runtime.mambalmheadmodelgenerationsession method)": [[28, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.setup"]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[28, "tensorrt_llm.runtime.TensorInfo.shape"]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[28, "tensorrt_llm.runtime.KVCacheManager.step"]], "tensorrt_llm.runtime": [[28, "module-tensorrt_llm.runtime"]], "to_word_list_format() (in module tensorrt_llm.runtime)": [[28, "tensorrt_llm.runtime.to_word_list_format"]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.tokens_per_block"]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.tokens_per_block"]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules"]], "update_kv_cache_draft_token_location() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.update_kv_cache_draft_token_location"]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[28, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset"]], "use_context_fmha_for_generation (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.use_context_fmha_for_generation"]], "use_context_fmha_for_generation (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.use_context_fmha_for_generation"]], "use_custom_all_reduce (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.use_custom_all_reduce"]], "use_custom_all_reduce (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.use_custom_all_reduce"]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin"]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin"]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin"]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[28, "tensorrt_llm.runtime.GenerationSession.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[28, "tensorrt_llm.runtime.ModelConfig.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.vocab_size"]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[28, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded"]]}})