mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
1 line
425 KiB
JavaScript
1 line
425 KiB
JavaScript
Search.setIndex({"docnames": ["2023-05-17-how-to-add-a-new-model", "2023-05-19-how-to-debug", "_cpp_gen/runtime", "architecture", "batch_manager", "blogs/H100vsA100", "blogs/H200launch", "gpt_attention", "gpt_runtime", "graph-rewriting", "index", "installation", "memory", "performance", "precision", "python-api/tensorrt_llm.functional", "python-api/tensorrt_llm.layers", "python-api/tensorrt_llm.models", "python-api/tensorrt_llm.plugin", "python-api/tensorrt_llm.quantization", "python-api/tensorrt_llm.runtime"], "filenames": ["2023-05-17-how-to-add-a-new-model.md", "2023-05-19-how-to-debug.md", "_cpp_gen/runtime.rst", "architecture.md", "batch_manager.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "gpt_attention.md", "gpt_runtime.md", "graph-rewriting.md", "index.rst", "installation.md", "memory.md", "performance.md", "precision.md", "python-api/tensorrt_llm.functional.rst", "python-api/tensorrt_llm.layers.rst", "python-api/tensorrt_llm.models.rst", "python-api/tensorrt_llm.plugin.rst", "python-api/tensorrt_llm.quantization.rst", "python-api/tensorrt_llm.runtime.rst"], "titles": ["How to add a new model", "How to debug", "Runtime", "TensorRT-LLM Architecture", "The Batch Manager in TensorRT-LLM", "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM", "Multi-head, Multi-query and Group-query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "Welcome to TensorRT-LLM\u2019s documentation!", "Build TensorRT-LLM", "Memory Usage of TensorRT-LLM", "Performance of TensorRT-LLM", "Numerical Precision", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime"], "terms": {"thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 20], "document": [0, 1, 3, 5, 6, 7, 8, 11, 12, 13, 14, 15], "describ": [0, 1, 3, 4, 7, 8, 11, 13, 14, 15], "tensorrt": [0, 1, 2, 7, 8, 9, 14, 15, 17, 20], "llm": [0, 1, 7, 8, 9, 14, 15], "what": 0, "provid": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 20], "low": [0, 7], "level": [0, 2, 3, 4, 12], "function": [0, 1, 2, 3, 4, 7, 8, 10, 12, 14, 20], "concat": [0, 15], "sum": [0, 8, 9, 15], "etc": [0, 2, 12], "basic": 0, "layer": [0, 1, 2, 3, 7, 8, 9, 10, 12, 14, 15], "linear": [0, 3, 12, 14, 15], "layernorm": [0, 15, 16, 17], "high": [0, 3, 12], "mlp": [0, 1, 3, 15, 17], "attent": [0, 3, 8, 10, 12, 15], "develop": [0, 3, 8, 11, 13, 15], "need": [0, 3, 4, 7, 8, 9, 11, 12, 13, 15, 20], "implement": [0, 3, 4, 7, 8, 13, 14, 15], "creat": [0, 2, 3, 4, 8, 9, 12, 13, 15, 20], "directori": [0, 3, 11, 13, 20], "tensorrt_llm": [0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 13, 15, 16, 17, 19, 20], "e": [0, 2, 7, 8, 9, 13, 14, 15], "g": 0, "bloom": [0, 8, 14], "write": 0, "py": [0, 1, 3, 7, 9, 11, 12, 13, 15], "It": [0, 2, 3, 4, 7, 8, 9, 11, 13, 14, 15], "": [0, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16, 20], "option": [0, 2, 4, 5, 8, 9, 11, 12, 13, 15, 20], "us": [0, 1, 2, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16, 17, 20], "usual": [1, 3, 15], "we": [1, 8, 9, 11, 13, 15], "want": [1, 15], "print": [1, 2, 12, 13], "intermedi": [1, 3, 7], "tensor": [1, 2, 3, 4, 5, 6, 8, 13, 14, 15, 16, 17, 20], "valu": [1, 2, 3, 4, 5, 7, 8, 12, 14, 15, 17, 19, 20], "when": [1, 3, 4, 7, 8, 11, 12, 13, 14, 15, 16, 17, 20], "obei": 1, "defin": [1, 3, 4, 6, 7, 8, 9, 14, 15, 16], "run": [1, 2, 3, 4, 7, 8, 11, 12, 14, 15, 20], "paradigm": 1, "should": [1, 2, 9, 11, 12, 13, 15, 16, 20], "mark": [1, 2, 4, 8, 9, 15], "interest": 1, "network": [1, 3, 7, 9, 12, 13, 14, 15], "output": [1, 2, 4, 5, 6, 7, 9, 13, 15, 16, 20], "Then": [1, 15], "runtim": [1, 7, 10, 15, 16], "regist": [1, 4], "register_network_output": 1, "api": [1, 3, 8, 11, 12, 15], "class": [1, 2, 3, 8, 9, 11, 15, 16, 17, 19, 20], "modul": [1, 3, 7, 8, 10, 11, 16, 17], "def": [1, 3, 9], "__init__": [1, 3, 9], "self": [1, 3, 7, 9, 15, 17, 20], "hidden_s": [1, 9, 15, 16, 17, 20], "ffn_hidden_s": [1, 16, 17], "bia": [1, 3, 8, 15, 16, 17], "true": [1, 2, 4, 8, 9, 15, 16, 17, 20], "tp_group": [1, 15, 16], "none": [1, 9, 15, 16, 17, 20], "tp_size": [1, 13, 15, 16], "1": [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 20], "super": [1, 9], "fc": [1, 3], "columnlinear": [1, 16], "gather_output": [1, 16], "fals": [1, 2, 7, 8, 9, 15, 16, 17, 20], "proj": 1, "rowlinear": [1, 16], "forward": [1, 2, 9, 16, 17], "hidden_st": [1, 15, 16, 17, 20], "inter": 1, "relu": [1, 3, 15, 17], "here": [1, 3, 9, 11, 12, 14], "after": [1, 2, 3, 4, 7, 8, 9, 11, 12, 13, 15, 16], "return": [1, 2, 3, 4, 9, 12, 15, 16, 17, 20], "k": [1, 3, 7, 8, 14, 15], "v": [1, 2, 5, 7, 8, 13, 14, 15], "gm": 1, "named_network_output": 1, "net": 1, "_mark_output": 1, "dtype": [1, 2, 3, 9, 13, 15, 16, 17, 20], "kei": [1, 3, 13, 20], "i": [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 20], "full": [1, 4, 5, 6, 7, 8, 12], "exampl": [1, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 20], "an": [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 19, 20], "gpt": [1, 2, 3, 4, 5, 7, 10, 12, 14, 15], "In": [1, 5, 7, 9, 11, 12, 13, 14, 15], "residu": 1, "attention_output": 1, "data": [1, 2, 3, 5, 6, 13, 15], "post_layernorm": [1, 15], "mlp_output": 1, "build": [1, 3, 8, 9, 10, 12], "net_guard": [1, 9], "set_named_paramet": 1, "tensorrt_llm_gpt": [1, 3], "named_paramet": 1, "input": [1, 2, 3, 4, 5, 6, 9, 12, 13, 15, 16, 17, 20], "prepare_input": [1, 12, 17], "arg": [1, 9], "max_batch_s": [1, 7, 12, 13, 15, 17, 20], "max_input_len": [1, 12, 13, 17, 20], "max_output_len": [1, 13, 17], "max_beam_width": [1, 7, 12, 15, 17], "trt": [1, 3, 5, 9, 12, 15, 17, 20], "str_dtype_to_trt": 1, "engin": [1, 3, 4, 8, 9, 12, 15, 20], "rm": [1, 11, 13, 15], "rf": 1, "gpt2": [1, 8], "git": [1, 11, 13], "clone": [1, 11, 13], "http": [1, 7, 11, 13, 14, 15], "huggingfac": [1, 3], "co": [1, 15], "medium": 1, "pushd": 1, "pytorch_model": 1, "bin": 1, "safetensor": 1, "wget": 1, "q": [1, 7, 8, 15], "resolv": 1, "main": [1, 8, 15], "popd": 1, "python3": [1, 11, 13], "hf_gpt_convert": 1, "o": [1, 9, 12], "c": [1, 2, 3, 4, 7, 9, 13], "parallel": [1, 3, 4, 6, 8, 12, 13, 15, 16], "storag": [1, 13], "type": [1, 2, 3, 4, 5, 7, 8, 9, 14, 15, 20], "float16": [1, 9, 13], "model_dir": 1, "gpu": [1, 2, 5, 6, 7, 8, 11, 15, 20], "use_gpt_attention_plugin": [1, 13, 20], "open": [1, 8], "mode": [1, 3, 4, 7, 9, 12, 14, 15, 16, 17, 20], "decod": [1, 2, 7, 8, 12, 20], "generationsess": [1, 7, 12, 20], "model_config": [1, 20], "engine_buff": [1, 20], "runtime_map": 1, "debug_mod": [1, 20], "gener": [1, 2, 3, 4, 5, 12, 15, 20], "info": 1, "step": [1, 2, 3, 4, 7, 8, 9, 13, 15, 20], "0": [1, 2, 3, 5, 6, 7, 8, 9, 13, 15, 16, 17, 20], "ctx_shape": 1, "ctx_buffer": 1, "_get_context_shape_buff": 1, "input_id": [1, 17, 20], "max_input_length": [1, 15, 16, 17], "input_length": [1, 15, 16, 17], "position_id": [1, 17], "last_token_id": [1, 15, 17], "attention_mask": [1, 16, 17, 20], "this_src_cache_indirect": 1, "_set_shap": 1, "context": [1, 2, 4, 8, 12, 15, 20], "_set_buff": 1, "debug_buff": 1, "stream": [1, 2, 3, 4, 8, 12, 13, 20], "torch": [1, 7, 11, 15, 20], "cuda": [1, 2, 3, 7, 8, 11, 12, 20], "current_stream": 1, "cuda_stream": 1, "ok": 1, "_run": 1, "rais": 1, "runtimeerror": 1, "fail": [1, 12, 20], "synchron": [1, 2, 3, 15], "6": [1, 8, 13, 15], "max_new_token": [1, 12, 17, 20], "next_step_shap": 1, "next_step_buff": [1, 20], "_get_next_step_shape_buff": 1, "batch_siz": [1, 7, 9, 12, 13, 15, 16, 20], "scfg": [1, 20], "num_beam": 1, "next_src_cache_indirect": 1, "next_context": 1, "see": [1, 2, 3, 4, 7, 8, 11, 12, 14, 15, 16], "python": [1, 3, 8, 9, 13, 14], "8": [1, 2, 6, 7, 12, 13, 14, 15], "dict_kei": 1, "logit": [1, 2, 8, 15], "cache_indirect": [1, 7, 15, 16, 20], "past_key_0": 1, "past_value_0": 1, "present_key_0": 1, "present_value_0": 1, "past_key_1": 1, "past_value_1": 1, "present_key_1": 1, "present_value_1": 1, "past_key_2": 1, "past_value_2": 1, "present_key_2": 1, "present_value_2": 1, "past_key_3": 1, "past_value_3": 1, "present_key_3": 1, "present_value_3": 1, "past_key_4": 1, "past_value_4": 1, "present_key_4": 1, "present_value_4": 1, "past_key_5": 1, "past_value_5": 1, "present_key_5": 1, "present_value_5": 1, "past_key_6": 1, "past_value_6": 1, "present_key_6": 1, "present_value_6": 1, "past_key_7": 1, "past_value_7": 1, "present_key_7": 1, "present_value_7": 1, "past_key_8": 1, "past_value_8": 1, "present_key_8": 1, "present_value_8": 1, "past_key_9": 1, "past_value_9": 1, "present_key_9": 1, "present_value_9": 1, "past_key_10": 1, "past_value_10": 1, "present_key_10": 1, "present_value_10": 1, "past_key_11": 1, "past_value_11": 1, "present_key_11": 1, "present_value_11": 1, "past_key_12": 1, "past_value_12": 1, "present_key_12": 1, "present_value_12": 1, "past_key_13": 1, "past_value_13": 1, "present_key_13": 1, "present_value_13": 1, "past_key_14": 1, "past_value_14": 1, "present_key_14": 1, "present_value_14": 1, "past_key_15": 1, "past_value_15": 1, "present_key_15": 1, "present_value_15": 1, "past_key_16": 1, "past_value_16": 1, "present_key_16": 1, "present_value_16": 1, "past_key_17": 1, "past_value_17": 1, "present_key_17": 1, "present_value_17": 1, "past_key_18": 1, "past_value_18": 1, "present_key_18": 1, "present_value_18": 1, "past_key_19": 1, "past_value_19": 1, "present_key_19": 1, "present_value_19": 1, "past_key_20": 1, "past_value_20": 1, "present_key_20": 1, "present_value_20": 1, "past_key_21": 1, "past_value_21": 1, "present_key_21": 1, "present_value_21": 1, "past_key_22": 1, "past_value_22": 1, "present_key_22": 1, "present_value_22": 1, "past_key_23": 1, "past_value_23": 1, "present_key_23": 1, "present_value_23": 1, "sequence_length": [1, 15, 16, 20], "past_key_value_length": [1, 15, 16], "2": [1, 2, 5, 6, 7, 8, 9, 12, 13, 14, 15, 17, 20], "3": [1, 5, 6, 7, 8, 9, 12, 13, 15], "4": [1, 6, 8, 9, 12, 13, 14, 15], "5": [1, 5, 6, 8, 13, 15], "7": [1, 5, 8, 13, 15], "9": [1, 5, 8, 13, 15], "10": [1, 13], "11": [1, 6, 15], "12": [1, 5, 13, 15], "13": [1, 15], "14": [1, 13], "15": 1, "16": [1, 5, 12, 13, 14], "17": 1, "18": 1, "19": 1, "20": 1, "21": 1, "22": [1, 15], "23": 1, "0295": 1, "0256": 1, "0780": 1, "0562": 1, "0241": 1, "0273": 1, "0089": 1, "5882": 1, "1989": 1, "0464": 1, "6305": 1, "5967": 1, "8793": 1, "1056": 1, "7083": 1, "0889": 1, "0714": 1, "2931": 1, "1209": 1, "0886": 1, "5927": 1, "1048": 1, "3437": 1, "1085": 1, "0752": 1, "0739": 1, "6156": 1, "3454": 1, "3014": 1, "2653": 1, "7126": 1, "9685": 1, "1145": 1, "0084": 1, "9521": 1, "1425": 1, "devic": [1, 2, 13, 15, 20], "2129": 1, "5879": 1, "8172": 1, "7892": 1, "6887": 1, "6063": 1, "4184": 1, "0066": 1, "3895": 1, "9023": 1, "0686": 1, "2831": 1, "7935": 1, "5085": 1, "1696": 1, "5839": 1, "1375": 1, "0078": 1, "0810": 1, "1262": 1, "6260": 1, "1065": 1, "0529": 1, "7143": 1, "3322": 1, "8835": 1, "3427": 1, "8159": 1, "0622": 1, "2327": 1, "2217": 1, "2057": 1, "1475": 1, "3545": 1, "1673": 1, "1131": 1, "1268": 1, "1570": 1, "3972": 1, "8213": 1, "3282": 1, "8672": 1, "born": 1, "north": 1, "east": 1, "franc": 1, "soyer": 1, "train": [1, 3, 5], "chef": 1, "befor": [1, 2, 3, 4, 7, 9, 11, 12, 13, 15, 20], "move": 1, "london": 1, "earli": 1, "If": [1, 3, 7, 8, 9, 11, 12, 13, 15, 20], "you": [1, 3, 7, 8, 9, 11, 12, 13, 15, 20], "plugin": [1, 7, 8, 9, 10, 11, 12, 13, 14, 15], "can": [1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 14, 15, 17, 20], "set": [1, 2, 4, 7, 8, 9, 12, 13, 15, 16, 20], "environ": [1, 4, 8, 11, 13], "variabl": [1, 4, 6, 8], "cuda_launch_block": 1, "so": [1, 2, 7, 9, 11, 12, 13, 15, 16], "kernel": [1, 2, 3, 7, 8, 12, 13, 15], "ar": [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 20], "launch": [1, 2, 3, 4, 8], "statu": 1, "check": [1, 8, 12, 15], "immedi": [1, 7], "memori": [1, 2, 3, 4, 5, 7, 8, 10, 13, 15, 20], "make": [1, 3, 7, 9, 13, 15], "sure": [1, 15], "respect": [1, 8, 12, 13, 14, 15], "time": [1, 2, 3, 4, 6, 11, 13, 15, 20], "shape": [1, 2, 3, 7, 8, 9, 12, 14, 15, 17, 20], "thei": [1, 3, 7, 8, 11, 13, 14, 15], "resid": 1, "correct": [1, 7], "cpu": [1, 2, 3, 15], "namespac": [2, 4, 8], "includ": [2, 3, 4, 5, 7, 8, 11, 13, 14], "A": [2, 3, 4, 7, 8, 15, 20], "helper": [2, 15], "manag": [2, 3, 7, 8, 10, 12, 20], "host": [2, 11, 15], "public": [2, 8], "ibufferptr": 2, "uniqueptr": 2, "itensorptr": 2, "cudastreamptr": 2, "std": [2, 4, 8], "shared_ptr": [2, 4], "explicit": [2, 8, 15], "construct": [2, 3], "paramet": [2, 3, 4, 7, 12, 15, 16, 20], "The": [2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 20], "all": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 20], "oper": [2, 3, 7, 8, 9, 12, 13, 15], "alloc": [2, 4, 7, 8, 12, 15, 20], "de": 2, "copi": [2, 4, 8, 11, 12, 13, 15, 20], "size_t": [2, 8], "size": [2, 5, 6, 7, 8, 13, 15, 16, 17], "nvinfer1": 2, "datatyp": [2, 3, 8, 15, 17, 20], "kbyte_typ": 2, "const": [2, 4, 8], "given": [2, 4, 6, 8, 12, 14, 15, 16, 17, 20], "dim": [2, 15, 16], "dimens": [2, 7, 8, 12, 15, 16, 17], "memorytyp": 2, "inlin": 2, "emptybuff": 2, "empti": [2, 4], "mai": [2, 3, 4, 7, 8, 11, 12, 13, 15], "resiz": 2, "later": [2, 3, 12], "emptytensor": 2, "reshap": 2, "void": [2, 3, 4, 8], "setzero": 2, "buffer": [2, 8, 12, 15], "content": [2, 12, 15], "zero": [2, 8, 14, 15, 16], "src": [2, 3, 15], "dst": 2, "srctype": 2, "dsttype": 2, "copyfrom": 2, "new": [2, 4, 5, 6, 7, 8, 9, 10, 12, 15, 20], "potenti": [2, 4], "differ": [2, 3, 7, 8, 12, 13, 14, 15], "templat": [2, 3], "typenam": [2, 3], "t": [2, 3, 7, 12, 15], "vector": [2, 8, 15], "getstream": 2, "get": [2, 8, 9, 11, 15, 20], "underli": [2, 12], "memorypoolreserv": [2, 12], "current": [2, 4, 7, 8, 11, 12, 13, 15, 20], "reserv": [2, 8, 12], "pool": [2, 4, 7, 20], "memorypoolus": 2, "memorypoolfre": [2, 12], "free": [2, 3, 4, 8, 12, 13], "memorypooltrimto": 2, "try": [2, 4, 12], "trim": 2, "byte": [2, 8], "implicitli": 2, "static": [2, 11, 15, 16, 20], "pin": 2, "attribut": [2, 9, 20], "constexpr": 2, "auto": [2, 3, 4, 7, 8, 15], "kuint8": 2, "privat": [2, 8], "member": [2, 3, 8, 9, 15], "mstream": 2, "initmemorypool": [2, 12], "int": [2, 3, 8, 15, 16, 17, 20], "typedef": 2, "sizetyp": 2, "int32_t": [2, 4, 15], "tokenidtyp": 2, "stringptrmap": 2, "unordered_map": 2, "string": [2, 4, 8, 15, 20], "pointer": [2, 4, 8, 15, 20], "cudaevent_t": 2, "unsign": [2, 4], "flag": [2, 7, 8, 12, 15], "cudaeventdisabletim": 2, "event": 2, "destroi": [2, 12], "destructor": [2, 4], "creation": [2, 15], "By": [2, 8, 11], "default": [2, 8, 11, 12, 13, 15, 20], "disabl": [2, 4, 7, 8, 12, 15], "bool": [2, 4, 8, 9, 15, 16, 17, 20], "ownsev": 2, "pass": [2, 4, 7, 8, 9, 12, 15, 16, 20], "exist": [2, 8, 20], "object": [2, 3, 8, 12, 15, 16, 17, 20], "whether": [2, 8, 15, 16, 20], "own": [2, 3, 4, 8, 11], "associ": [2, 4, 8, 11, 15], "element_typ": 2, "remove_pointer_t": 2, "eventptr": 2, "unique_ptr": 2, "delet": 2, "mevent": 2, "mownsev": 2, "cudastreamnonblock": 2, "prioriti": 2, "cudastreamcreatewithflag": 2, "list": [2, 3, 4, 8, 9, 11, 15, 16, 17, 20], "valid": [2, 13, 15], "lower": [2, 8, 9, 12, 15], "number": [2, 3, 4, 7, 8, 12, 13, 14, 15, 16], "repres": [2, 13, 15, 20], "higher": [2, 3, 4, 5, 8, 12, 13], "cudadevicegetstreampriorityrang": 2, "more": [2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15], "inform": [2, 7, 8], "about": [2, 12], "meaning": 2, "cudastream_t": 2, "ownsstream": 2, "which": [2, 4, 7, 8, 9, 11, 12, 13, 15, 17, 20], "wa": [2, 8, 14, 16], "getdevic": 2, "record": [2, 9], "wait": [2, 4], "streamptr": 2, "mdevic": 2, "mownsstream": 2, "tensorptr": 2, "maxlength": 2, "maxkvcachelength": 2, "batchsiz": [2, 5, 8], "endid": [2, 8], "finish": [2, 8, 20], "sequencelimitlength": 2, "embeddingbia": 2, "length": [2, 5, 6, 7, 8, 12, 13, 15], "badwordslist": [2, 8], "stopwordslist": [2, 8], "norepeatngrams": 2, "cacheindirect": 2, "sharedptr": 2, "id": [2, 4, 8, 15, 16, 20], "newtokensstep": 2, "newtoken": 2, "newtokensvec": 2, "finishedstep": 2, "finishedsum": 2, "logprob": [2, 8], "cumlogprob": 2, "parentid": 2, "beamhypothes": 2, "float": [2, 3, 5, 8, 14, 15, 16, 17], "knegativeinfin": 2, "1e20f": 2, "beamwidth": [2, 8], "maxsequencelength": [2, 8, 12], "releas": [2, 7, 8, 12, 13, 14, 15], "init": [2, 11, 13], "slice": [2, 15], "batchindex": 2, "outputidstgt": 2, "sequencelengthstgt": 2, "normedscor": 2, "minnormedscor": 2, "numbeam": 2, "isdon": 2, "ttensor": 2, "genericgenerationinput": 2, "padid": [2, 8], "pack": [2, 8, 12, 15], "maxnewtoken": [2, 8], "base": [2, 3, 5, 8, 12, 15, 16, 17, 19, 20], "genericgenerationoutput": 2, "callback": [2, 8], "contextlogit": [2, 8], "generationlogit": [2, 8], "ontokengener": [2, 8], "igptdecod": 2, "subclass": 2, "virtual": [2, 16], "setup": [2, 7, 12, 20], "forwardasync": 2, "gathertre": 2, "finaloutputid": 2, "accepttoken": 2, "targettokenid": 2, "drafttokenid": 2, "contextlength": 2, "numdrafttoken": 2, "sequencelength": 2, "finishedvec": 2, "finishedfin": 2, "vocabs": [2, 8], "vocabsizepad": [2, 8], "overrid": [2, 20], "mmanag": 2, "cudaalloc": 2, "malloc": 2, "dynamicdecodelay": 2, "mdynamicdecodelay": 2, "mlogprobstil": 2, "support": [2, 4, 5, 6, 7, 15], "flight": [2, 7, 12], "batch": [2, 5, 6, 10, 12, 13, 15, 20], "maxbatchs": [2, 8], "maxbeamwidth": [2, 4, 8], "maxtokensperstep": 2, "call": [2, 3, 4, 7, 8, 9, 12, 15, 20], "newrequest": 2, "batchidx": 2, "decoder_batch": 2, "request": [2, 3, 5, 7, 8, 12, 15], "initi": [2, 8, 12, 15], "newbatch": 2, "tokenptr": 2, "one": [2, 4, 7, 8, 9, 11, 12, 13, 15, 20], "without": [2, 3, 7, 12, 15], "block": [2, 3, 4, 7, 8, 12, 15, 20], "process": [2, 3, 4, 7, 8, 13, 15], "token": [2, 4, 7, 8, 12, 14, 15, 16, 20], "forwardsync": 2, "complet": [2, 4, 8], "thread": [2, 4, 7], "last": [2, 4, 7, 8, 15], "getfinish": 2, "indic": [2, 4, 7, 8, 12, 13, 15, 16], "getoutputid": 2, "index": [2, 8, 10, 15], "maxinputlength": [2, 8], "contain": [2, 3, 4, 7, 8, 9, 14, 15, 17, 20], "pad": [2, 4, 8, 9, 12, 15, 16], "final": [2, 4, 8, 15], "gather": [2, 15], "beam": [2, 3, 4, 8, 12, 15], "search": [2, 3, 8, 10, 15], "result": [2, 3, 4, 5, 6, 7, 8, 11, 15, 16], "onli": [2, 3, 4, 7, 8, 9, 11, 12, 13, 15, 16, 20], "avail": [2, 3, 6, 9, 11, 12, 13, 14], "getparentid": 2, "parent": 2, "collect": [2, 3, 4, 9, 13, 15], "dure": [2, 3, 4, 7, 8, 9, 11, 12, 15], "getcumlogprob": 2, "cumul": 2, "log": [2, 8, 12], "probabl": [2, 8], "per": [2, 4, 6, 7, 8, 12, 14, 15], "getlogprob": 2, "getallnewtoken": 2, "getnewtoken": 2, "iter": [2, 3, 4, 20], "within": [2, 3, 13, 15], "getnbstep": 2, "execut": [2, 3, 8, 11, 12, 15, 20], "each": [2, 3, 4, 7, 8, 9, 12, 13, 14, 15, 16, 20], "getnbfinish": 2, "sequenc": [2, 3, 5, 6, 7, 8, 9, 12, 13, 15, 16, 20], "gptdecoderptr": 2, "decodinginputptr": 2, "decodingoutputptr": 2, "postprocessrequest": 2, "mvocabs": 2, "mvocabsizepad": 2, "mbuffermanag": 2, "mforwardtoken": 2, "mforwardev": 2, "mdecod": 2, "mdecodinginput": 2, "mdecodingoutput": 2, "mjointdecodinginput": 2, "mjointdecodingoutput": 2, "mdrafttokenid": 2, "mnumdrafttoken": 2, "mnbstep": 2, "mfinish": 2, "mfinishedsum": 2, "mmaxnewtoken": 2, "mbeamwidth": 2, "mgeneratedtokensperstep": 2, "mmaxsequencelength": 2, "mmaxkvcachelength": 2, "mactualbatchs": 2, "mmaxtokensperstep": 2, "name": [2, 4, 8, 9, 15, 20], "precis": [2, 8, 10, 12], "tensorparallel": [2, 8], "pipelineparallel": [2, 8], "modelconfig": [2, 8, 20], "getmodelconfig": 2, "getnam": 2, "getprecis": 2, "gettensorparallel": 2, "getpipelineparallel": 2, "getworlds": 2, "enginefilenam": 2, "model": [2, 4, 5, 6, 7, 10, 12, 14, 15], "pars": 2, "json": [2, 4], "istream": 2, "filesystem": 2, "path": [2, 4, 7, 11, 13, 15], "mname": 2, "mprecis": 2, "mtensorparallel": 2, "mpipelineparallel": 2, "mgptmodelconfig": 2, "enum": 2, "modelvari": 2, "enumer": [2, 15, 19], "kgpt": 2, "kglm": 2, "nblayer": 2, "nbhead": 2, "hiddens": [2, 8], "getvocabs": 2, "noexcept": 2, "getvocabsizepad": 2, "worldsiz": [2, 8], "getnblay": 2, "getnbhead": 2, "getnbkvhead": 2, "setnbkvhead": 2, "nbkvhead": 2, "gethiddens": 2, "getsizeperhead": 2, "getdatatyp": 2, "usegptattentionplugin": [2, 8], "usepackedinput": 2, "inputpack": [2, 8], "usepagedkvcach": 2, "pagedkvcach": [2, 8], "gettokensperblock": 2, "settokensperblock": 2, "tokensperblock": [2, 8], "quantmod": [2, 7, 8, 15, 16, 17, 19, 20], "getquantmod": 2, "setquantmod": 2, "supportsinflightbatch": 2, "getmaxbatchs": 2, "setmaxbatchs": 2, "getmaxinputlen": 2, "setmaxinputlen": 2, "maxinputlen": [2, 8], "getmaxoutputlen": 2, "setmaxoutputlen": 2, "maxoutputlen": [2, 8], "getmaxnumtoken": 2, "setmaxnumtoken": 2, "maxnumtoken": 2, "useprompttun": 2, "getmaxpromptembeddingtables": 2, "setmaxpromptembeddingtables": 2, "maxpromptembeddingtables": 2, "computecontextlogit": 2, "computegenerationlogit": 2, "getmodelvari": 2, "setmodelvari": 2, "usecustomallreduc": 2, "customallreduc": 2, "setmaxdraftlen": 2, "maxdraftlen": 2, "getmaxtokensperstep": 2, "mnblayer": 2, "mnbhead": 2, "mnbkvhead": 2, "mhiddens": 2, "mdatatyp": 2, "musegptattentionplugin": 2, "minputpack": 2, "mpagedkvcach": 2, "mtokensperblock": 2, "mquantmod": 2, "mmaxbatchs": 2, "mmaxinputlen": 2, "mmaxoutputlen": 2, "mmaxnumtoken": 2, "mcomputecontextlogit": 2, "mcomputegenerationlogit": 2, "mmodelvari": 2, "musecustomallreduc": 2, "mmaxpromptembeddingtables": 2, "mmaxdraftlen": 2, "batch_manag": [2, 4], "kv_cache_manag": 2, "loggerptr": 2, "ilogg": 2, "config": [2, 8, 12, 17, 20], "sessionconfig": [2, 8], "enginebuff": [2, 8], "engines": [2, 8], "logger": [2, 8], "nullptr": 2, "uint8_t": [2, 8], "enginefil": 2, "getlogg": 2, "getbuffermanag": 2, "getworldconfig": 2, "kvcachemanag": [2, 7, 20], "kvcacheconfig": [2, 8, 12], "tokengeneratedcallback": 2, "usecudagraph": 2, "generatebatch": 2, "microbatchesoutput": 2, "microbatchesinput": 2, "createcontext": 2, "numbatchesctx": 2, "numbatchesgen": 2, "createbuff": 2, "nummicrobatch": 2, "createdecod": 2, "logitstyp": 2, "decoderperrequest": [2, 8], "createkvcachemanag": 2, "createcustomallreduceworkspac": 2, "executecontextstep": 2, "microbatch": [2, 4], "microbatchoffset": 2, "executegenerationstep": 2, "microbatchesfinish": 2, "decoderstepasync": 2, "decoderstep": 2, "microbatchid": 2, "pp": [2, 8], "rank": [2, 4, 7, 8, 12, 15, 20], "receiv": [2, 4, 15], "other": [2, 3, 7, 8, 11, 12, 13, 15], "shouldstopsync": 2, "shouldstop": 2, "prob": [2, 8], "send": [2, 3, 15], "them": [2, 9, 12, 20], "first": [2, 3, 6, 7, 8, 9, 11, 15], "asynchron": 2, "requir": [2, 3, 7, 8, 11, 12, 13, 15, 16], "access": [2, 4, 8, 13], "kvcacheaddsequ": 2, "firstbatchidx": 2, "initdecod": 2, "outputid": 2, "popul": [2, 3, 4, 7, 8, 15], "refer": [2, 3, 9, 13, 15], "createontokengeneratedcallback": 2, "mmodelconfig": 2, "mworldconfig": 2, "ncclcommun": 2, "mpipelinecomm": 2, "mcommstream": 2, "mcommev": 2, "mcommptr": 2, "ipcmemori": [2, 15], "mipcmemoryhandl": 2, "mdecodermaxsequencelength": 2, "mdecodermaxkvcachelength": 2, "mlogger": 2, "tllmruntim": [2, 8], "mruntim": 2, "mkvcachemanag": 2, "microbatchconfig": 2, "mmicrobatchconfig": 2, "runtimebuff": 2, "mbuffer": 2, "mreceivedev": 2, "mcudagraphmod": 2, "cudagraphexecutor": 2, "mcudagraphinst": 2, "friend": 2, "trtgptmodelv1": 2, "configur": [2, 3, 6, 7, 12, 13, 20], "session": [2, 20], "width": [2, 4, 7, 8, 12], "smaller": [2, 8, 12, 13, 15], "than": [2, 5, 6, 7, 8, 9, 12, 15], "divid": [2, 15], "micro": [2, 4, 8, 12], "cudagraphmod": [2, 8], "ctxmicrobatchs": [2, 8], "nullopt": 2, "genmicrobatchs": [2, 8], "hasinst": 2, "clear": [2, 20], "preparenextgraph": 2, "nextcontextid": 2, "cudagraph_t": 2, "graph": [2, 3, 8, 10, 12, 15], "updat": [2, 3, 6, 11, 13], "uploadtostream": 2, "cudagraphexec_t": 2, "minstanc": 2, "numctxpergen": 2, "getctxcontextid": 2, "generationbatchid": 2, "contextbatchid": 2, "numgenbatch": 2, "phase": [2, 4, 8, 9, 12, 15], "next": [2, 3, 5, 12, 20], "numctxbatch": 2, "getgencontextid": 2, "flipflopid": 2, "flip": [2, 15], "flop": 2, "between": [2, 3, 7, 8, 12, 15], "ctxbatchsiz": 2, "genbatchs": 2, "util": [2, 3, 4, 7, 8, 12], "loadengin": 2, "enginepath": 2, "struct": 2, "memorytypestr": 2, "kgpu": 2, "kcpu": 2, "kpin": 2, "datatypetrait": 2, "kfloat": [2, 3], "char": 2, "sizeof": 2, "khalf": 2, "half": [2, 3, 15], "kint8": 2, "int8_t": 2, "int8": [2, 12, 15], "kint32": 2, "int32": [2, 7, 15], "kint64": 2, "int64_t": 2, "int64": [2, 15], "uint32_t": 2, "uint32": 2, "uint64_t": [2, 4], "uint64": 2, "kunsign": 2, "kbool": 2, "uint8": 2, "trtdatatyp": 2, "bufferdatatyp": 2, "pointerelementtyp": 2, "remove_reference_t": 2, "remove_const_t": 2, "constpointercast": 2, "ptr": [2, 15], "d": [2, 4, 13, 15], "buffercast": 2, "ostream": 2, "kdatatyp": 2, "kisunsign": 2, "kispoint": 2, "For": [2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20], "convert": [2, 3], "wrapper": [2, 9], "around": 2, "_unsign": 2, "ispoint": 2, "isunsign": 2, "getsiz": 2, "ktrtpointertyp": 2, "munsign": 2, "mpointer": 2, "kunderlyingtyp": 2, "uniqueconstptr": 2, "sharedconstptr": 2, "arrai": [2, 15, 20], "element": [2, 7, 8, 14, 15], "getsizeinbyt": 2, "getcapac": 2, "capac": [2, 6], "getdatatypenam": 2, "getmemorytyp": 2, "getmemorytypenam": 2, "newsiz": 2, "op": [2, 9, 15], "equal": [2, 8, 12, 15, 16], "reset": [2, 8, 20], "Not": 2, "allow": [2, 4, 7, 8, 13, 15], "offset": [2, 14, 15], "view": [2, 15], "have": [2, 4, 7, 8, 12, 13, 15], "same": [2, 3, 4, 5, 7, 8, 9, 12, 13, 15, 16, 20], "tconstptr": 2, "enable_if_t": 2, "is_const_v": 2, "independ": 2, "wrap": [2, 3, 15, 20], "cannot": [2, 3, 8, 13, 15], "beyond": [2, 5], "determin": [2, 7, 14, 15, 17], "protect": 2, "tobyt": 2, "bufferrang": 2, "value_typ": 2, "size_typ": 2, "const_refer": 2, "const_point": 2, "const_iter": 2, "begin": 2, "end": [2, 3, 4, 7, 8, 13, 15], "cbegin": 2, "cend": 2, "mdata": 2, "msize": 2, "actual": [2, 9, 15], "maxseqlen": 2, "consttensorptr": 2, "bufferptr": 2, "inputlen": 2, "generatedtokensperstep": 2, "drafttoken": 2, "computecumlogprob": 2, "computelogprob": 2, "tensorconstptr": 2, "activ": [2, 3, 4, 5, 7, 8, 9, 11, 14, 15], "also": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 15], "reshapebuff": 2, "its": [2, 3, 4, 6, 7, 8, 9, 12, 15], "dimtyp": 2, "decltyp": 2, "getshap": 2, "volum": [2, 11], "squeez": 2, "remov": [2, 3, 7, 8, 9, 11, 12, 13, 15], "unit": [2, 11], "from": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 16, 20], "unsqueez": [2, 15], "add": [2, 3, 7, 8, 9, 10, 11, 15, 20], "specifi": [2, 8, 9, 11, 12, 13, 15], "posit": [2, 15], "nbdim": 2, "volumenonneg": 2, "throw": 2, "where": [2, 3, 7, 8, 12, 13, 14, 15, 20], "ad": [2, 7, 8, 9, 11, 15, 20], "w": [2, 14], "r": [2, 13], "makeshap": 2, "initializer_list": 2, "conveni": 2, "tostr": 2, "castsiz": 2, "setpeeraccess": 2, "enabl": [2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16], "buffers": 2, "getcommptrstensor": 2, "flags_siz": 2, "max_all_reduce_block": 2, "allocateipcmemori": 2, "destroyipcmemori": 2, "mbuffers": 2, "mbufferptr": 2, "difftyp": 2, "ptrdiff_t": 2, "getgpu": 2, "getcpu": 2, "getpin": 2, "getgpudiff": 2, "getcpudiff": 2, "getpinneddiff": 2, "dealloc": 2, "getinst": 2, "bytestostr": 2, "mgpu": 2, "mcpu": 2, "mpin": 2, "mgpudiff": 2, "mcpudiff": 2, "mpinneddiff": 2, "thread_loc": 2, "genericprompttuningparam": 2, "embeddingt": 2, "task": [2, 13, 14, 16, 20], "prompttuningen": 2, "filltaskstensor": 2, "taskshost": 2, "numcontextrequest": 2, "reqbeamwidth": 2, "reqpromptlength": 2, "packedinput": 2, "optvec": 2, "floattyp": 2, "temperatur": [2, 8], "minlength": [2, 8], "repetitionpenalti": [2, 8], "presencepenalti": [2, 8], "topk": [2, 8], "topp": [2, 8], "long": [2, 7, 12], "randomse": [2, 8], "toppdecai": [2, 8], "toppmin": [2, 8], "toppresetid": [2, 8], "beamsearchdiversityr": [2, 8], "lengthpenalti": [2, 8], "sever": [2, 4, 9, 12, 15, 20], "asciichar": 2, "msg": 2, "getlevel": 2, "setlevel": 2, "gpuspernod": [2, 8], "kdefaultgpuspernod": 2, "istensorparallel": 2, "ispipelineparallel": 2, "getrank": 2, "getgpuspernod": 2, "getpipelineparallelrank": 2, "gettensorparallelrank": 2, "isfirstpipelineparallelrank": 2, "islastpipelineparallelrank": 2, "my": 2, "pipelin": [2, 3, 4, 8, 12], "getlastrank": 2, "getpipelineparallelgroup": 2, "validconfig": 2, "mpi": [2, 3, 4, 8], "mrank": 2, "mgpuspernod": 2, "toolkit": 3, "assembl": 3, "optim": [3, 6, 7, 8, 12, 15], "solut": 3, "perform": [3, 4, 6, 7, 8, 9, 10, 15], "larg": [3, 7, 12, 13, 15], "languag": [3, 8, 15], "infer": [3, 5, 6, 8, 14, 15, 20], "offer": 3, "effici": [3, 7, 8, 12, 13], "nvidia": [3, 5, 6, 11, 12, 13, 15], "compon": [3, 4, 7], "those": [3, 7, 8, 13, 14, 15, 16], "well": [3, 5, 7, 8, 14], "backend": [3, 4], "triton": 3, "server": [3, 5], "easili": [3, 15], "web": 3, "servic": 3, "through": [3, 4, 7, 8, 9, 16], "As": [3, 7, 9, 12, 14, 15], "user": [3, 4, 7, 8, 9, 11, 12, 13, 14, 15], "veri": [3, 7, 8], "either": [3, 4, 12, 15], "your": [3, 8, 11, 13], "select": [3, 12, 15], "pre": [3, 7, 12, 13, 15], "onc": [3, 8, 9, 11, 12, 13, 15], "must": [3, 4, 7, 8, 11, 14, 15, 16, 20], "framework": [3, 15], "outsid": 3, "scope": 3, "checkpoint": [3, 14], "download": 3, "variou": [3, 7, 13], "To": [3, 6, 7, 11, 12, 13, 14, 15], "illustr": [3, 9], "point": [3, 4, 5, 7, 8, 13, 14, 15], "lot": [3, 7], "obtain": [3, 4, 15], "hub": 3, "nemo": [3, 14], "pytorch": [3, 9, 15], "equip": 3, "recreat": 3, "wai": [3, 9, 11, 12], "eas": 3, "alreadi": [3, 7, 8, 9], "hand": 3, "standard": [3, 11, 15], "togeth": [3, 7, 8, 14], "along": [3, 7, 15], "extens": 3, "sampl": [3, 7, 20], "top": [3, 7, 8], "p": [3, 8, 15], "exhaust": 3, "found": [3, 5, 8, 9, 11, 14], "recommend": [3, 5, 7, 8, 11, 12, 13], "onlin": [3, 6, 13], "serv": [3, 4, 7], "explain": [3, 8, 12, 14, 15], "mention": [3, 8], "abov": [3, 4, 8, 11, 12, 13], "ha": [3, 4, 7, 8, 11, 12, 14, 15], "built": [3, 8, 11, 12, 13, 15], "power": [3, 6], "represent": [3, 8, 9], "deep": [3, 5, 6, 15], "neural": [3, 9], "becom": [3, 7, 9], "familiar": [3, 8], "core": [3, 5, 9], "concept": 3, "section": [3, 13, 15], "proceed": 3, "further": 3, "builder": 3, "That": [3, 4, 7, 8, 15], "instanc": [3, 4, 8, 9, 12, 15, 20], "create_network": 3, "method": [3, 7, 8, 13, 14, 20], "inetworkdefinit": [3, 9], "simpl": [3, 8, 9, 11], "insert": [3, 9, 15], "iactivationlay": 3, "act_typ": [3, 15], "activationtyp": [3, 15], "default_trtnet": 3, "add_activ": 3, "trt_tensor": [3, 15], "_create_tensor": 3, "get_output": [3, 9], "even": [3, 4, 7, 8, 12, 15], "easier": 3, "few": [3, 13], "most": [3, 5, 6, 8, 12, 15], "deriv": [3, 12], "partial": 3, "sigmoid": [3, 15], "special": [3, 7], "advanc": [3, 11], "silu": [3, 15], "travers": 3, "transform": [3, 7, 12], "expos": [3, 4, 8, 11], "ilay": [3, 9], "done": [3, 13, 15], "build_engin": 3, "build_serialized_network": 3, "everyth": 3, "work": [3, 4, 7, 8, 9, 11, 14, 15], "expect": [3, 6, 8, 13, 15], "produc": [3, 8, 9, 15], "ihostmemori": 3, "store": [3, 5, 7, 8, 12, 14, 15], "binari": [3, 15], "file": [3, 4, 7, 9, 13, 20], "emb": [3, 16], "known": [3, 4, 7, 15], "reason": [3, 7, 8], "bound": [3, 6, 8, 12, 15, 20], "lead": [3, 9], "code": [3, 4, 7, 8, 9, 11, 13, 14, 15], "like": [3, 4, 7, 8, 9, 12, 13, 14, 15], "two": [3, 4, 5, 7, 8, 9, 13, 15, 16, 20], "out_featur": [3, 16], "in_featur": [3, 16], "fromfil": 3, "note": [3, 4, 8, 9, 12, 14, 15, 20], "refit": 3, "featur": [3, 4, 8, 9, 11, 13], "refit_engin": 3, "One": [3, 15], "techniqu": [3, 4, 7, 14], "improv": [3, 5, 6, 7, 8, 13], "help": [3, 4, 7, 9, 11, 13], "reduc": [3, 4, 7, 11, 12, 15], "amount": [3, 12, 20], "transfer": 3, "dram": 3, "comput": [3, 5, 6, 7, 8, 12, 13, 15], "locat": [3, 8, 9, 11, 15], "multiprocessor": 3, "overhead": 3, "small": [3, 7, 12], "addit": [3, 4, 6, 11, 14, 15, 16], "cost": [3, 12], "classic": 3, "matrix": [3, 7, 8, 15], "multipl": [3, 4, 7, 8, 9, 13, 15, 16], "matmul": [3, 7, 14, 15], "preced": 3, "written": 3, "b": [3, 5, 6, 9, 13, 15], "global": [3, 4, 7], "read": [3, 4, 7, 8], "again": 3, "suboptim": 3, "why": [3, 12, 15], "identifi": [3, 4, 8, 15], "automat": [3, 4, 8, 9, 12, 14, 15], "appli": [3, 7, 9, 14, 15], "With": [3, 7, 8], "instead": [3, 8, 9, 11, 12, 15], "twice": 3, "fuse": [3, 7], "algorithm": [3, 7, 8], "possibl": [3, 8, 11, 12, 15], "almost": [3, 12], "infinit": 3, "some": [3, 8, 9, 12, 13], "involv": [3, 16], "modif": [3, 9], "flash": [3, 7], "multihead": [3, 7, 15], "mani": [3, 4, 7, 15], "arithmet": 3, "bmm": 3, "softmax": [3, 7, 15], "stand": [3, 13], "product": [3, 6, 7, 8, 15], "interleav": [3, 7], "loop": [3, 4, 8, 13], "non": [3, 4, 7, 13, 15], "trivial": 3, "necessarili": [3, 12], "someth": 3, "discov": 3, "might": [3, 8, 11, 12], "polyhedr": 3, "alwai": [3, 7, 8], "risk": [3, 4], "uncommon": 3, "overcom": [3, 7], "inevit": 3, "limit": [3, 7, 8, 9, 12, 13, 15], "mechan": [3, 4], "map": [3, 4, 7, 9, 15, 17, 20], "cpp": [3, 8, 10, 11, 13, 15], "follow": [3, 4, 8, 9, 11, 13, 14, 15], "interfac": [3, 20], "extend": [3, 15], "custom": [3, 8, 13, 15], "guid": [3, 15], "trigger": [3, 7, 8, 9], "encapsul": [3, 7, 8, 15], "fairli": 3, "quantizetensorplugin": 3, "enqueu": [3, 12, 20], "inputdesc": 3, "invokequant": 3, "els": [3, 15], "quantiz": [3, 5, 7, 8, 10, 15, 16, 17, 20], "cu": 3, "quantizedkernel": 3, "grid": 3, "detail": [3, 4, 7, 12, 15], "how": [3, 4, 8, 10, 11, 12, 13, 14], "head": [3, 8, 10, 12, 15], "queri": [3, 8, 10, 12, 15], "group": [3, 8, 10, 14, 15, 16], "role": 3, "load": [3, 13], "drive": 3, "typic": [3, 8, 9, 12], "regress": [3, 4, 7, 8], "charg": [3, 8], "both": [3, 4, 5, 7, 8, 9, 12, 14, 15, 16], "bodi": 3, "design": 3, "singl": [3, 4, 6, 7, 8, 12, 13, 14, 15], "system": [3, 4, 5, 8, 13], "commun": [3, 4, 8], "primit": 3, "nccl": [3, 15], "librari": [3, 8, 11, 13], "presenc": [3, 8], "connect": 3, "nvswitch": 3, "dgx": [3, 8], "ncclplugin": 3, "allreduc": [3, 15], "allgath": [3, 15], "tgt": [3, 15], "recv": [3, 15], "former": 3, "split": [3, 8, 12, 15, 20], "across": [3, 4, 6, 7, 8, 9, 15], "entir": [3, 12, 15], "sibbl": 3, "distribut": [3, 7, 8, 11, 12, 15], "subset": [3, 8, 15], "happen": [3, 8], "boundari": [3, 8, 12], "balanc": [3, 8], "bandwidth": [3, 5, 6, 8], "incur": 3, "issu": [3, 7], "less": [3, 5, 7, 8, 15], "term": [3, 15], "continu": [3, 4, 6, 7, 8], "throughput": [3, 5, 6], "reli": [4, 7, 9, 14], "aim": 4, "queue": 4, "elimin": 4, "inclus": [4, 8, 15], "newli": [4, 8], "arriv": 4, "via": [4, 11, 15], "hook": 4, "softwar": [4, 7, 8], "client": [4, 11], "text": [4, 8], "interact": 4, "mandatori": [4, 8], "Their": 4, "signatur": [4, 9, 15], "h": [4, 8, 15], "These": [4, 6, 11], "invok": [4, 8, 9], "regular": [4, 7, 15], "interv": 4, "varieti": 4, "below": [4, 6, 7, 8, 9, 13], "entri": 4, "getinferencerequestscallback": 4, "inferencerequest": 4, "take": [4, 7, 8, 9, 13, 15, 16], "maximum": [4, 6, 7, 8, 12, 13, 15], "accept": [4, 8, 11, 15], "neg": [4, 15], "unbound": 4, "64": [4, 5, 6, 8, 13, 20], "bit": [4, 5, 7, 8, 14], "uniqu": [4, 7, 8, 15], "respons": [4, 15], "deliv": [4, 13], "sendresponsecallback": 4, "conform": 4, "boolean": [4, 8, 15], "error": [4, 8, 12], "messag": [4, 12, 15], "been": [4, 7], "encount": 4, "case": [4, 5, 7, 8, 12, 13, 14, 15], "properli": 4, "handl": [4, 15, 16], "Its": [4, 7, 8, 15], "reject": 4, "ani": [4, 9, 17, 20], "sent": 4, "correspond": [4, 7, 9, 11, 14, 15, 16, 20], "being": [4, 7, 8, 13], "reus": [4, 12], "appear": [4, 7, 8, 15], "third": [4, 8], "argument": [4, 8, 11, 12, 15], "stop": [4, 8, 9], "pollstopsignalcallback": 4, "unordered_set": 4, "ensur": [4, 9], "report": [4, 12], "returnbatchmanagerstatscallback": 4, "packag": [4, 11], "field": [4, 8, 14], "timestamp": 4, "put_tim": 4, "tm": 4, "m": [4, 5, 13, 14, 15], "y": [4, 11, 14], "counter": 4, "increas": [4, 5, 6, 8, 15], "monoton": 4, "over": [4, 5, 6, 15], "count": 4, "max": [4, 5, 6, 12, 15], "page": [4, 6, 8, 10, 12, 15], "kv": [4, 8, 15], "cach": [4, 8, 14, 15], "schedul": [4, 12], "total": [4, 7, 8, 12], "v1": 4, "slot": [4, 8], "integr": 4, "item": [4, 20], "assum": [4, 15], "style": [4, 7], "autoregress": 4, "architectur": [4, 5, 8, 10, 11], "spawn": 4, "worker": 4, "constructor": [4, 8], "persist": [4, 13], "start": [4, 9, 13, 15], "intend": [4, 12], "back": 4, "safe": [4, 9], "retir": 4, "notifi": 4, "final_respons": 4, "intern": [4, 7, 12], "state": [4, 7, 8, 9, 15], "relat": [4, 11, 12, 15], "freed": 4, "batchmanag": 4, "pathtotrtengin": 4, "trtgptmodeltyp": 4, "inflightbatch": 4, "schedulerpolici": 4, "polici": [4, 12], "maxnumrequest": 4, "getinferencerequestscb": 4, "sendresponsecb": 4, "adjust": [4, 12], "maxim": [4, 6], "aggress": 4, "max_util": 4, "paus": 4, "short": [4, 7, 20], "resum": 4, "visibl": [4, 15], "effect": 4, "latenc": [4, 5, 6, 7, 8], "adopt": [4, 8, 9], "conserv": 4, "approach": [4, 9], "know": 4, "suffici": 4, "worst": 4, "consumpt": [4, 5, 7], "guaranteed_no_evict": 4, "termin": 4, "node": [4, 8, 14, 15], "control": [4, 7, 8, 9, 14, 15, 20], "cuda_visible_devic": 4, "care": 4, "taken": [4, 5, 15], "broadcast": [4, 15], "seen": 4, "hold": [4, 9, 12, 16], "ident": [4, 15], "good": 4, "evalu": [5, 6], "hopper": [5, 11, 13], "amper": [5, 11, 13], "show": [5, 12], "up": [5, 6, 7, 13], "4x": [5, 6], "faster": [5, 6, 7], "1st": [5, 12, 13, 15], "abl": [5, 7], "peak": [5, 13], "concurr": 5, "while": [5, 8, 9, 12, 14], "maintain": [5, 7, 8, 14], "min": [5, 15], "applic": 5, "10m": 5, "fp16": [5, 7], "sxm": 5, "80gb": [5, 13], "isl": [5, 6, 13], "osl": [5, 6, 13], "tp": [5, 6, 8, 13, 15], "32": [5, 6, 12, 13, 14], "v0": [5, 6], "calcul": [5, 15], "sweep": 5, "largest": [5, 6, 8], "success": 5, "out": [5, 6, 8, 12, 13, 15], "j": [5, 7, 8, 14, 15], "6b": [5, 15], "128": [5, 6, 13], "907": [5, 13], "102": 5, "185": 5, "679": [5, 13], "481": 5, "111": 5, "speedup": 5, "0x": 5, "7x": 5, "tp1": [5, 6], "behind": 5, "chart": 5, "tabl": [5, 13, 15, 16], "larger": [5, 6, 7, 12, 13, 15], "stai": 5, "tune": [5, 12, 16], "highlight": 5, "llama": [5, 6, 8, 14], "come": [5, 8, 12], "soon": [5, 6], "recent": [5, 7], "demonstr": 5, "5x": 5, "compar": [5, 6, 8, 15], "previou": [5, 8], "2x": [5, 6], "switch": [5, 11, 12], "yet": 5, "anoth": [5, 7, 9, 15], "speed": 5, "highest": [5, 6, 9], "center": [5, 6], "acceler": [5, 6, 7], "ai": 5, "hpc": 5, "analyt": 5, "cloud": 5, "edg": 5, "workstat": 5, "nativ": 5, "doubl": [5, 12], "halv": [5, 15], "specif": [5, 8, 9, 11, 13, 15], "introduc": [5, 11, 14], "paper": [5, 14], "format": [5, 11, 12, 20], "learn": [5, 6, 15], "post": 5, "consist": [5, 9, 14, 15], "encod": [5, 7, 8, 14, 15], "e4m3": 5, "expon": 5, "mantissa": 5, "e5m2": 5, "weight": [5, 15, 16], "gradient": 5, "practic": [5, 6, 12], "perceiv": [5, 13], "w8a8": 5, "mean": [5, 6, 7, 8, 12, 13, 15, 20], "8bit": 5, "decreas": 5, "fit": [5, 7], "819": 6, "9x": 6, "hbm3e": 6, "fp8": [6, 12, 15], "tok": [6, 13], "llama_13b": 6, "1024": [6, 15, 16], "2048": [6, 13], "750": 6, "349": 6, "llama_70b": 6, "512": 6, "014": 6, "654": 6, "341": 6, "303": 6, "preliminari": 6, "measur": [6, 13], "subject": [6, 15], "chang": [6, 11, 12, 15, 20], "v9": 6, "due": [6, 8, 13], "depend": [6, 7, 8, 9, 11, 12, 13, 15], "benefit": [6, 9], "retain": 6, "similar": [6, 8, 9, 15], "offlin": 6, "summar": [6, 7, 12, 13], "scenario": [6, 7], "70b": 6, "tp8": 6, "chat": 6, "agent": 6, "80": [6, 11, 13], "200": 6, "gpt3": 6, "175b": 6, "hgx": 6, "6x": 6, "vari": 6, "shown": [6, 8, 11, 15], "swept": 6, "newest": 6, "portfolio": 6, "8tb": 6, "expand": [6, 15], "141": 6, "gigabyt": 6, "gb": 6, "combin": [6, 9, 13, 16], "second": [6, 8, 15], "mha": [7, 15], "multiqueri": 7, "mqa": [7, 15], "gqa": [7, 15], "quick": [7, 13], "remind": 7, "articl": 7, "arxiv": [7, 14, 15], "org": [7, 14, 15], "ab": [7, 14, 15], "1911": 7, "02150": 7, "2307": 7, "09288": 7, "variant": [7, 15], "fewer": 7, "gpt_attent": [7, 9, 15], "discuss": 7, "futur": [7, 11, 12, 13, 14, 15], "remove_input_pad": [7, 13, 15, 16, 20], "shorter": [7, 8], "max_sequence_length": 7, "excess": 7, "unneed": 7, "surround": 7, "problem": [7, 11], "1d": [7, 15], "context_fmha_typ": [7, 12], "slowest": 7, "footprint": [7, 12, 13], "signific": 7, "quadrat": [7, 12], "otherwis": [7, 8, 13, 15], "enabled_with_fp32_acc": 7, "accumul": 7, "forc": [7, 13], "fp32": 7, "vanilla": 7, "2205": 7, "14135": 7, "08691": 7, "extra": 7, "plan": 7, "order": [7, 12, 13, 15], "overal": 7, "mask": [7, 15, 16], "fly": [7, 13, 14, 15], "do": [7, 9, 13, 15], "dequant": [7, 15], "ia3": 7, "version": [7, 8, 11, 15], "occup": [7, 12], "multi_block_mod": 7, "test": [7, 8, 11, 13], "exact": [7, 12], "definit": [7, 15], "hard": 7, "predict": 7, "rule": 7, "thumb": 7, "worth": 7, "num_head": [7, 15, 17, 20], "processor": 7, "suggest": 7, "evolv": [7, 14], "research": [7, 14], "conduct": 7, "There": [7, 8, 9, 12, 13, 14, 15], "minimum": [7, 12, 15, 20], "heurist": 7, "purpos": [7, 11], "better": 7, "go": 7, "s0": 7, "s1": 7, "s2": 7, "constraint": [7, 15], "relax": 7, "ineffici": 7, "resourc": 7, "common": [7, 12, 15], "past": 7, "monolith": 7, "max_seqlen": [7, 15], "hidden_dim_per_head": [7, 15], "close": [7, 12], "reach": [7, 8], "decompos": 7, "keep": [7, 8, 15], "track": 7, "recycl": 7, "simplifi": [7, 8, 15], "rest": 7, "bfloat16": [7, 14], "howev": [7, 12, 13], "kv_cache_quant_mod": [7, 15], "int8_kv_cach": [7, 14], "fp8_kv_cach": [7, 13, 14], "kv_orig_quant_scal": [7, 15], "kv_quant_orig_scal": [7, 15], "treat": 7, "circular": 7, "n": [7, 8, 12, 13, 14, 15], "max_kv_cache_length": [7, 15, 20], "overwrit": 7, "least": 7, "surpass": 7, "window_s": 7, "deal": [7, 9], "exce": [7, 15], "address": [7, 12, 13], "experiment": [7, 8, 14], "simpli": [7, 13], "num_lay": [7, 16, 17, 20], "still": [7, 12, 15], "reconstruct": [7, 15], "beam_width": [7, 15, 20], "si": 7, "bi": 7, "ti": 7, "integ": [7, 8, 14, 15], "stage": [7, 9, 12], "concaten": [7, 15], "project": [7, 8, 11], "hidden": [7, 8, 12, 15, 16], "3d": [7, 15], "batch_beam_s": [7, 15], "hidden_dim": [7, 15], "multipli": [7, 15], "num_token": [7, 15], "greater": [7, 8, 15], "word": [7, 8, 15, 20], "pseudo": [7, 8, 14, 15], "seq": [7, 12], "context_phas": 7, "generation_phas": 7, "homogen": 7, "longer": [7, 8], "justifi": 7, "rotary_embedding_dim": [7, 15], "neox": [7, 8, 14], "form": [7, 15], "position_embedding_typ": [7, 15, 16, 17], "positionembeddingtyp": [7, 15, 16, 17], "rope_gpt_neox": [7, 15, 17], "rope_gptj": [7, 15, 16], "slope": [7, 15], "constant": [7, 12, 15], "f": [7, 8, 13, 15], "q_scale": [7, 15, 16, 17], "sqrt": [7, 15], "head_siz": [7, 15, 17, 20], "On": 7, "broader": 7, "aspect": 7, "kind": [7, 9], "accord": [7, 15, 16], "lightweight": 7, "popular": 7, "t5": [7, 8, 14], "famili": 7, "ahead": 7, "ii": [7, 15], "implicit": [7, 15], "suit": 7, "too": 7, "turn": [7, 12, 20], "max_dist": [7, 15, 16, 17], "compos": 8, "declar": [8, 9], "gptsessiontest": [8, 11], "restrict": [8, 11, 15], "now": 8, "enc_dec": 8, "folder": [8, 13, 14], "gptsession": [8, 11, 12], "gptmodelconfig": 8, "worldconfig": 8, "famou": 8, "mpi_comm_world": 8, "warn": [8, 12, 13, 15], "descript": [8, 15], "compil": [8, 11, 15], "overload": 8, "three": [8, 14, 15], "longest": [8, 15], "addition": [8, 13], "directli": [8, 9, 11], "maxtoken": [8, 12], "freegpumemoryfract": [8, 12], "fraction": [8, 15], "enter": [8, 9], "getter": 8, "setter": 8, "vocabulari": [8, 16], "numlay": 8, "numhead": 8, "numkvhead": 8, "multi": [8, 10, 11, 15], "relev": [8, 11], "numer": [8, 10], "lmm": 8, "thing": 8, "cluster": 8, "collabor": [8, 15], "nvlink": 8, "consecut": 8, "harder": 8, "guarante": 8, "absenc": 8, "advantag": 8, "interconnect": 8, "a100": 8, "mpi_init": 8, "argc": 8, "argv": 8, "mpi_comm_s": 8, "mpi_comm_rank": 8, "simplic": 8, "mpirun": [8, 13], "command": [8, 11, 12, 13], "instal": [8, 11, 13], "talk": 8, "administr": 8, "program": 8, "until": 8, "look": [8, 11], "were": [8, 13], "present": [8, 14], "allfinish": 8, "computelogit": 8, "generatetokensfromlogit": 8, "generationinput": 8, "generationoutput": 8, "aka": [8, 15], "eo": 8, "50": 8, "256": 8, "257": 8, "fill": [8, 15], "numtoken": 8, "match": [8, 9, 13, 15, 20], "made": 8, "flexibl": [8, 11, 15], "embeddingbiasopt": 8, "ban": 8, "badwordslength": 8, "stopwordslength": 8, "let": [8, 9, 15], "consid": [8, 13, 15], "row": [8, 14, 15], "prefix": [8, 15], "diagram": 8, "inner": [8, 15], "maxseqlength": 8, "gather_all_token_logit": [8, 17, 20], "import": 8, "impact": [8, 13], "lm": 8, "just": [8, 12], "caller": 8, "samplingconfig": [8, 20], "except": [8, 15], "0f": 8, "penal": 8, "often": [8, 15], "irrespect": 8, "mutual": [8, 14], "exclus": [8, 14], "finer": [8, 9], "grain": [8, 9], "random": 8, "seed": 8, "decai": 8, "exponenti": 8, "factual": 8, "enhanc": [8, 12], "0e": 8, "influenc": 8, "remain": [8, 9, 12, 15], "greedi": 8, "upper": [8, 12, 15], "divers": 8, "factor": [8, 12, 14, 15], "renam": 8, "beamsearchlengthpenalti": 8, "scalar": [8, 15], "gptdecod": 8, "doe": [8, 12, 15, 20], "satisfi": 8, "separ": [8, 11, 13, 15], "biggest": 8, "individu": 8, "behavior": [8, 12, 15], "revisit": 8, "structur": [8, 9, 12], "could": [8, 9, 12], "rebuild": 8, "part": [9, 11, 15], "gw": 9, "manipul": 9, "modifi": 9, "facilit": 9, "gemm": [9, 12], "smoothquant": 9, "alter": 9, "fusion": [9, 12, 14], "ideal": 9, "condit": [9, 15], "would": [9, 13], "nest": 9, "flow": 9, "scatter": 9, "get_par": [9, 15], "get_us": [9, 15], "consum": [9, 15], "replace_all_uses_with": [9, 15], "replac": [9, 12, 13, 15], "origin": 9, "miss": [9, 13], "especi": 9, "opaqu": 9, "world": [9, 15], "wise": 9, "singleton": [9, 15], "flayerinfomemo": 9, "replace_input_with": 9, "replace_output_uses_with": 9, "redirect": 9, "usag": [9, 10, 15], "patternrewrit": 9, "match_and_rewrit": 9, "complex": 9, "patternanalyz": 9, "analysi": [9, 12], "analyz": 9, "rewritepatternmanag": 9, "label": [9, 15], "privileg": [9, 13], "analysispatternmanag": 9, "vital": 9, "certain": 9, "manner": 9, "routin": 9, "subtract": 9, "test_graph_rewrit": 9, "naivepatternrewriter_replaceaddwithsub": 9, "replace_add_with_sub": 9, "root_lay": 9, "layertyp": 9, "elementwis": [9, 15], "separate_match_rewrit": 9, "as_lay": 9, "elementwiseoper": [9, 15], "elementwise_sum": 9, "subgraph": 9, "get_input": 9, "old": 9, "elementwise_sub": 9, "dangl": 9, "prune": [9, 15], "explicitli": [9, 11], "skip": [9, 13], "mark_as_remov": 9, "rather": 9, "unnecessari": 9, "four": [9, 16], "share": [9, 11, 13, 16], "nearli": 9, "never": 9, "depriv": 9, "sinc": [9, 11, 12], "commonli": 9, "gptattentionpluginremovepaddingrewritepass": 9, "gpt_attention_plugin_remove_pad": 9, "plugin_v2": 9, "plugin_namespac": 9, "plugin_typ": 9, "gptattent": 9, "flayer": 9, "assert": [9, 15], "although": 9, "black": 9, "box": 9, "tensor_input": 9, "extern": [9, 12, 13, 20], "in_len": 9, "new_input": 9, "clone_input": 9, "arglist": 9, "new_out": 9, "replace_outputs_uses_with": 9, "quit": [9, 13], "focu": 9, "u": 9, "real": [9, 11], "pleas": 9, "fuseattentionwithbiaspass": 9, "graph_rewrit": 9, "debug": [10, 12, 20], "rewrit": [10, 15], "instruct": 11, "latest": 11, "polygraphi": 11, "repositori": 11, "docker": [11, 13], "platform": 11, "lf": [11, 13], "apt": 11, "github": [11, 13, 15], "com": [11, 13, 15], "cd": [11, 13], "submodul": [11, 13], "recurs": [11, 13], "pull": [11, 13], "imag": 11, "release_build": 11, "cuda_arch": 11, "cmake": 11, "ada": [11, 13], "89": 11, "90": 11, "release_run": 11, "local_us": [11, 13], "local": [11, 13], "account": 11, "root": [11, 13, 15], "insid": [11, 12, 15], "app": 11, "tag": 11, "devel": 11, "who": 11, "prefer": 11, "shell": 11, "target": 11, "dockerfil": 11, "ipc": 11, "ulimit": 11, "memlock": 11, "stack": 11, "67108864": 11, "pwd": 11, "workdir": 11, "script": [11, 12, 13, 14], "build_wheel": [11, 13], "trt_root": [11, 13], "usr": [11, 13], "deploi": 11, "pip": [11, 13], "whl": [11, 13], "increment": 11, "clean": 11, "semicolon": 11, "cuda_architectur": 11, "86": 11, "cmakelist": 11, "txt": [11, 13], "particular": 11, "opt": [11, 14, 15], "python_bind": 11, "wheel": [11, 13], "interpret": 11, "consult": 11, "understand": 11, "cpp_onli": 11, "particularli": 11, "avoid": [11, 12], "dual": 11, "abi": 11, "gcc": 11, "overridden": 11, "build_dir": 11, "choos": 11, "against": 11, "libtensorrt_llm": 11, "libtensorrt_llm_stat": 11, "libnvinfer_plugin_tensorrt_llm": 11, "under": 11, "question": 12, "At": [12, 16], "major": 12, "contributor": 12, "io": 12, "fix": 12, "chosen": 12, "strategi": [12, 15], "portion": [12, 15], "live": 12, "profil": [12, 15], "affect": 12, "icudaengin": [12, 20], "device_memory_s": 12, "off": 12, "fmha": 12, "explan": 12, "relationship": 12, "gptlmheadmodel": [12, 17], "line": [12, 13], "linearli": 12, "max_num_token": [12, 17], "becaus": [12, 13, 15], "save": [12, 13], "fold": 12, "rang": [12, 14, 15, 17], "significantli": 12, "workspac": [12, 15, 16, 17], "much": 12, "chunk": [12, 15], "thu": [12, 15], "normal": [12, 15], "max_context_length": [12, 15, 16, 20], "bind": 12, "behav": 12, "neither": [12, 15], "nor": 12, "85": 12, "And": [12, 15, 16], "firstli": 12, "enough": 12, "space": [12, 13], "oom": 12, "No": 12, "left": [12, 15], "whole": [12, 15], "buffermanag": 12, "driver": 12, "smi": 12, "concern": 12, "inspect": 12, "choic": [12, 15], "layout": 12, "theoret": 12, "though": 12, "succe": 12, "check_gpt_mem_usag": 12, "exceed": 12, "physic": [12, 15], "verbos": 12, "ye": [12, 15], "sequenti": 12, "shall": 12, "occupi": 12, "16x": 12, "observ": 13, "179": 13, "229": 13, "980": 13, "193": 13, "367": 13, "058": 13, "230": 13, "317": 13, "616": 13, "843": 13, "583": 13, "96": 13, "686": 13, "073": 13, "465": 13, "630": 13, "859": 13, "757": 13, "240": 13, "622": 13, "581": 13, "531": 13, "558": 13, "526": 13, "650": 13, "486": 13, "459": 13, "529": 13, "592": 13, "237": 13, "181": 13, "272": 13, "738": 13, "929": 13, "923": 13, "202": 13, "29": 13, "36": 13, "26": 13, "109": 13, "27": 13, "205": 13, "71": 13, "73": 13, "129": 13, "133": 13, "47": 13, "377": 13, "61": 13, "509": 13, "swiglu": [13, 15], "use_fused_mlp": [13, 17], "baselin": 13, "sourc": [13, 15, 16, 17, 19, 20], "right": [13, 15], "elev": 13, "uid": 13, "gid": 13, "boot": 13, "slurm": 13, "pyxi": 13, "caus": 13, "makefil": 13, "nv_gpu": 13, "gpu_opt": 13, "mount": 13, "previous": 13, "destin": 13, "appropri": 13, "docker_run_arg": 13, "ll": 13, "fine": [13, 16], "repeatedli": 13, "our": 13, "scheme": 13, "ran": 13, "transit": 13, "hbm3": 13, "enable_fp8": 13, "newer": 13, "tweak": 13, "find": 13, "gptj": 13, "enable_context_fmha": 13, "parallel_build": 13, "output_dir": 13, "tmp": 13, "world_siz": [13, 15], "hidden_act": [13, 16, 17], "gelu": [13, 15, 17], "strongly_typ": 13, "n_layer": 13, "28": 13, "n_head": 13, "n_embd": 13, "4096": 13, "n_posit": 13, "enable_two_optimization_profil": 13, "in_out_s": 13, "in_out": 13, "echo": 13, "awk": 13, "in_out_dim": 13, "gptsessionbenchmark": 13, "engine_dir": [13, 20], "warm_up": 13, "durat": 13, "num_run": 13, "input_output_len": 13, "pp_size": 13, "inter_s": [13, 17], "11008": 13, "vocab_s": [13, 16, 17, 20], "32000": 13, "n_kv_head": 13, "8192": 13, "28672": 13, "ffn_dim_multipli": 13, "multiple_of": 13, "oversubscrib": 13, "engine_path": 13, "_": 13, "use_inflight_batch": 13, "paged_kv_cach": [13, 20], "use_gemm_plugin": 13, "232": 13, "14848": 13, "65024": 13, "new_decoder_architectur": [13, 17], "ieee": 14, "x": [14, 15, 16], "scale": [14, 15], "satfinit": 14, "fp": 14, "static_cast": 14, "2d": [14, 15], "column": [14, 15], "channel": 14, "mi": 14, "ni": 14, "2211": [14, 15], "10438": 14, "accuraci": 14, "downstream": 14, "preprocess": 14, "prepar": [14, 15, 17], "2210": 14, "17323": 14, "2306": 14, "00978": 14, "weightonlygroupwisequantmatmulplugin": 14, "weight_only_groupwise_quant_matmul": 14, "v2": 14, "sq": 14, "baichuan": 14, "bert": [14, 15], "blip": 14, "chatglm": [14, 15], "v3": 14, "falcon": 14, "flan": 14, "internlm": 14, "mistral": 14, "mpt": 14, "replit": 14, "santacod": 14, "starcod": 14, "int4_weight": 14, "w4a": 14, "int8_weight": 14, "w8a": 14, "a8": 14, "per_channel": 14, "per_token": 14, "per_group": 14, "fp8_qdq": 14, "allreducestrategi": 15, "intenum": 15, "customallreducekernel": 15, "kept": 15, "sync": [15, 20], "oneshot": 15, "ring": 15, "twoshot": 15, "attentionmasktyp": [15, 16], "bidirect": 15, "bidirectionalglm": 15, "causal": 15, "dimrang": 15, "tupl": [15, 16, 20], "str": [15, 16, 17, 20], "param": [15, 20], "ctor": 15, "layernormpositiontyp": [15, 17], "pre_layernorm": [15, 17], "layernormtyp": [15, 17], "groupnorm": [15, 16], "rmsnorm": [15, 16, 17], "mlptype": [15, 17], "fusedgatedmlp": [15, 16], "gatedmlp": [15, 16], "alibi": 15, "alibi_with_scal": 15, "is_alibi": 15, "is_rop": 15, "learned_absolut": [15, 16, 17], "rel": 15, "rotaryscalingtyp": 15, "dynam": [15, 17, 20], "dim_rang": 15, "is_network_input": 15, "tensorloc": 15, "dens": 15, "cast": 15, "properti": [15, 20], "is_dynam": 15, "exclud": 15, "is_trt_wrapp": 15, "itensor": 15, "differenti": 15, "necessari": 15, "inherit": 15, "hierarchi": 15, "mark_output": 15, "keepdim": 15, "ndim": 15, "permut": 15, "new_tensor": 15, "undefin": 15, "split_size_or_sect": 15, "transpos": 15, "dim0": 15, "dim1": 15, "zero_is_placehold": 15, "unaryoper": 15, "closur": 15, "round": 15, "exp": 15, "sin": 15, "iunarylay": 15, "unari": 15, "tanh": 15, "sub": 15, "mul": 15, "prod": 15, "div": 15, "gt": 15, "lt": 15, "op_and": 15, "AND": 15, "op_or": 15, "OR": 15, "eq": 15, "pow": 15, "ielementwiselay": 15, "union": 15, "amongst": 15, "particip": 15, "section_s": 15, "contribut": 15, "doc": 15, "deeplearn": 15, "html": 15, "instance_id": [15, 16], "replic": 15, "poitner": 15, "barrier": 15, "arang": 15, "float32": [15, 16, 17], "ifilllay": 15, "filloper": 15, "linspac": 15, "_str_to_trt_dtype_dict": 15, "_util": 15, "argmax": 15, "onnx": 15, "blob": 15, "md": 15, "reduct": 15, "avg_pool2d": 15, "kernel_s": [15, 16], "stride": [15, 16], "ceil_mod": [15, 16], "count_include_pad": [15, 16], "bert_attent": 15, "relative_attent": [15, 16, 17], "relative_attention_bia": 15, "1706": 15, "03762": 15, "sum_of_token": 15, "bertattentionplugin": 15, "qkv": [15, 16], "max_seq_len": 15, "embed": 15, "num_bucket": [15, 16, 17], "distanc": 15, "broadcast_help": 15, "pair": 15, "127": 15, "split_siz": 15, "clip": 15, "alpha": 15, "beta": 15, "inp": 15, "jj": 15, "len": [15, 20], "ndarrai": 15, "iconstantlay": 15, "numpi": [15, 16], "serial": [15, 20], "constant_to_tensor_": 15, "conv2d": [15, 16], "dilat": [15, 16], "conv_transpose2d": 15, "output_pad": [15, 16], "einsum": 15, "einsum_eq": 15, "ieinsumlay": 15, "summat": 15, "equat": 15, "einstein": 15, "convent": 15, "ascii": 15, "letter": 15, "comma": 15, "subscript": 15, "repeat": 15, "diagon": 15, "ax": 15, "omit": 15, "express": 15, "alphabet": 15, "arrow": 15, "ij": 15, "jk": 15, "ik": 15, "equival": 15, "ellipsi": 15, "place": 15, "syntax": 15, "rubric": 15, "ji": 15, "kj": 15, "dot": 15, "ijk": 15, "ikl": 15, "ijl": 15, "elementwise_binari": 15, "sharding_dim": [15, 16], "tp_rank": [15, 16], "lookup": [15, 16], "among": 15, "transposit": 15, "default_net": 15, "plugin_config": 15, "lookup_plugin": 15, "igatherlay": 15, "tg_group": 15, "shard": [15, 16], "vocab": 15, "expand_shap": 15, "expans": 15, "islicelay": 15, "verifi": 15, "shrunk": 15, "behaviour": 15, "expand_dim": 15, "ishufflelay": 15, "new_shap": 15, "append": 15, "shuffl": 15, "expand_dims_lik": 15, "expand_mask": 15, "tgt_len": 15, "src_seq_len": 15, "tgt_seq_len": 15, "3rd": 15, "2nd": 15, "revers": 15, "axi": 15, "down": 15, "gatherel": 15, "gather_last_token_logit": 15, "extract": 15, "last_tokens_id": 15, "th": 15, "geglu": 15, "gate": 15, "generate_alibi_bias": 15, "key_length": 15, "bias": 15, "05100": 15, "generate_alibi_slop": 15, "alibi_scal": 15, "past_key_valu": [15, 16], "host_past_key_value_length": [15, 16], "host_max_kv_cache_length": [15, 16], "context_length": [15, 16, 20], "host_request_typ": [15, 16], "num_kv_head": [15, 16, 17, 20], "hidden_size_per_head": 15, "rotary_embedding_bas": [15, 16], "10000": [15, 16, 17], "rotary_embedding_scale_typ": 15, "rotary_embedding_scal": [15, 16], "rotary_embedding_max_posit": 15, "mask_typ": 15, "alibi_slop": 15, "kv_cache_block_point": [15, 16, 20], "do_cross_attent": [15, 16], "cross_qkv": 15, "cross_qkv_length": 15, "encoder_input_length": [15, 16, 20], "host_context_length": [15, 16, 20], "qkv_bia": [15, 17], "progress": 15, "hint": 15, "regard": 15, "merg": 15, "contigu": 15, "max_block": 15, "num_tokens_per_block": 15, "cache_indir_t": 15, "max_past_length": 15, "inflight": 15, "rope": 15, "theta": [15, 16], "ignor": 15, "rotari": 15, "glm": 15, "10b": 15, "max_blocks_per_sequ": 15, "cross": 15, "group_norm": 15, "num_group": [15, 16], "ep": [15, 16], "1e": [15, 16, 17], "05": [15, 16, 17], "todo": 15, "index_select": 15, "5th": 15, "interpol": 15, "scale_factor": 15, "nearest": 15, "align_corn": 15, "recompute_scale_factor": 15, "antialia": 15, "is_gated_activ": 15, "layer_norm": 15, "normalized_shap": [15, 16], "use_diff_of_squar": 15, "norm": 15, "simplest": 15, "gamma": 15, "formula": 15, "varianc": 15, "squar": 15, "var": 15, "epsilon": 15, "lora_plugin": [15, 20], "in_hidden_s": 15, "out_hidden_s": 15, "transa": 15, "transb": 15, "max_low_rank": 15, "lora_rank": 15, "lora_weights_point": 15, "lora_id": 15, "lora": 15, "workflow": 15, "low_rank": 15, "in_point": 15, "out_point": 15, "mat2": 15, "imatrixmultiplylay": 15, "ireducelay": 15, "non_gated_vers": 15, "outer": 15, "vec2": 15, "p2p": 15, "ncclrecv": 15, "repeat_interleav": 15, "repetit": 15, "unspecifi": 15, "rms_norm": 15, "06": [15, 16, 17], "weig": 15, "ncclsend": 15, "emul": 15, "slicemod": 15, "strict_bound": 15, "isoftmaxlay": 15, "softplu": 15, "threshold": 15, "stabl": 15, "nn": 15, "revert": 15, "ith": 15, "squared_relu": 15, "untouch": 15, "enforc": 15, "iselectlay": 15, "mish": 16, "num_attention_head": 16, "max_position_embed": [16, 17], "apply_query_key_layer_sc": [16, 17], "attention_head_s": 16, "attention_mask_typ": 16, "rotary_embedding_sc": [16, 17], "use_int8_kv_cach": 16, "rotary_embedding_percentag": [16, 17], "quant_mod": [16, 17, 20], "cross_attent": [16, 20], "dense_bia": 16, "use_cach": [16, 17], "kv_cache_param": [16, 17], "attention_param": [16, 17], "encoder_output": [16, 17, 20], "position_embed": 16, "norm_before_bmm1": 16, "lora_param": [16, 17], "attentionparam": [16, 17], "encoder_max_input_length": [16, 20], "is_valid": 16, "gpt_attention_plugin": [16, 20], "is_valid_cross_attn": 16, "bertattent": 16, "keyvaluecacheparam": [16, 17], "fill_none_tensor_list": 16, "list_siz": 16, "get_first_kv_cache_block_point": 16, "get_first_past_key_valu": 16, "ropeembeddingutil": 16, "apply_rotary_pos_emb": 16, "pos_emb_typ": 16, "apply_rotary_pos_emb_chatglm": 16, "create_sinusoidal_posit": 16, "num_po": 16, "rotate_every_two": 16, "rotate_half": 16, "output_dtyp": 16, "in_channel": 16, "out_channel": 16, "padding_mod": 16, "convtranspose2d": 16, "output_s": 16, "num_embed": 16, "embedding_dim": 16, "prompttuningembed": 16, "prompt": 16, "supplementari": 16, "dictionari": 16, "whose": 16, "assign": 16, "adequ": 16, "prompt_embedding_t": [16, 17, 20], "task_vocab_s": 16, "logic": 16, "seq_len": 16, "num_task": 16, "num_tokens_per_task": 16, "alia": 16, "share_weight": 16, "multiply_gath": 16, "gemm_plugin": 16, "use_fp8": 16, "multiply_reduc": 16, "num_channel": 16, "affin": 16, "elementwise_affin": 16, "avgpool2d": 16, "baichuanforcausallm": 17, "logits_dtyp": 17, "mlp_hidden_s": 17, "baichuanmodel": 17, "generationmixin": 17, "brief": [17, 20], "fed": 17, "bertforquestionansw": 17, "type_vocab_s": 17, "num_label": 17, "token_type_id": 17, "bertmodel": 17, "bloomforcausallm": 17, "multi_query_mod": 17, "use_parallel_embed": 17, "embedding_sharding_dim": 17, "share_embedding_t": 17, "bloommodel": 17, "chatglmheadmodel": 17, "apply_residual_connection_post_layernorm": 17, "enable_debug_output": 17, "linear_bia": 17, "max_seq_length": 17, "model_nam": [17, 20], "norm_epsilon": 17, "tokens_per_block": [17, 20], "chatglmmodel": 17, "decodermodel": 17, "encoder_num_head": 17, "encoder_hidden_s": 17, "encoder_head_s": 17, "encoder_num_kv_head": 17, "has_position_embed": [17, 20], "has_embedding_layernorm": 17, "has_embedding_scal": 17, "has_attention_qkvo_bia": 17, "has_mlp_bia": 17, "has_model_final_layernorm": 17, "layernorm_ep": 17, "layernorm_posit": 17, "layernorm_typ": 17, "mlp_type": 17, "has_lm_head_bia": 17, "residual_sc": 17, "decoder_input_id": 17, "all_reduce_workspac": 17, "max_decoder_input_len": 17, "max_encoder_input_len": 17, "encodermodel": 17, "falconforcausallm": 17, "use_alibi": 17, "parallel_attent": 17, "falconmodel": 17, "gptjforcausallm": 17, "rotary_dim": 17, "gptjmodel": 17, "rotary_bas": 17, "rotary_sc": 17, "use_prompt_tun": 17, "gptmodel": 17, "prompt_task": 17, "prompt_vocab_s": [17, 20], "prompt_embedding_table_s": 17, "max_draft_len": 17, "gptneoxforcausallm": 17, "gptneoxmodel": 17, "llamaforcausallm": 17, "rms_norm_ep": 17, "attn_bia": 17, "mlp_bia": 17, "llamamodel": 17, "optlmheadmodel": 17, "pre_norm": 17, "do_layer_norm_befor": 17, "optmodel": 17, "qwenforcausallm": 17, "seq_length": 17, "neox_rotary_styl": 17, "qwenmodel": 17, "quantize_model": 17, "kwarg": [17, 20], "intflag": 19, "chatglmgenerationsess": 20, "debug_tensors_to_sav": 20, "cuda_graph_mod": 20, "generationsequ": 20, "seq_idx": 20, "batch_idx": 20, "get_batch_idx": 20, "idx": 20, "get_seq_idx": 20, "buffer_alloc": 20, "cuda_stream_guard": 20, "exit": 20, "sampling_config": 20, "stop_words_list": 20, "bad_words_list": 20, "no_repeat_ngram_s": 20, "output_sequence_length": 20, "return_dict": 20, "decode_batch": 20, "decode_regular": 20, "ite": 20, "sequence_limit_length": 20, "decode_stream": 20, "finalize_decod": 20, "first_lay": 20, "handle_per_step": 20, "dict": 20, "has_token_type_embed": 20, "last_lay": 20, "num_heads_kv": 20, "pp_communicate_final_output_id": 20, "final_output_id": 20, "pp_communicate_new_token": 20, "should_stop": 20, "cache_indir": 20, "_runtim": 20, "lora_manag": 20, "loramanag": 20, "lora_uid": 20, "use_custom_all_reduc": 20, "use_lora_plugin": 20, "memory_pool": 20, "max_blocks_per_seq": 20, "max_kv_cache_len": 20, "add_sequ": 20, "context_len": 20, "get_pointer_arrai": 20, "max_prompt_embedding_table_s": 20, "modelrunn": 20, "classmethod": 20, "from_dir": 20, "batch_input_id": 20, "parametr": 20, "hoc": 20, "output_id": 20, "context_logit": 20, "generation_logit": 20, "iexecutioncontext": 20, "create_execution_context": 20, "from_engin": 20, "from_serialized_engin": 20, "infer_shap": 20, "tensorinfo": 20, "everi": 20, "Or": 20, "set_input_shap": 20, "manual": 20, "succeed": 20, "async": 20, "set_shap": 20, "tensor_dict": 20, "to_word_list_format": 20, "word_dict": 20, "add_special_token": 20, "sentenc": 20, "am": 20, "happi": 20, "sad": 20}, "objects": {"": [[2, 0, 1, "_CPPv48nvinfer1", "nvinfer1"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [2, 0, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [2, 0, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr", "tensorrt_llm::runtime::BufferManager::BufferManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool::device"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi", "tensorrt_llm::runtime::BufferManager::memoryPoolFree::device"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved::device"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::device"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed::device"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5beginEv", "tensorrt_llm::runtime::BufferRange::begin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange5beginEv", "tensorrt_llm::runtime::BufferRange::begin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange6cbeginEv", "tensorrt_llm::runtime::BufferRange::cbegin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange6cbeginEv", "tensorrt_llm::runtime::BufferRange::cbegin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4cendEv", "tensorrt_llm::runtime::BufferRange::cend"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4cendEv", "tensorrt_llm::runtime::BufferRange::cend"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange14const_iteratorE", "tensorrt_llm::runtime::BufferRange::const_iterator"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange13const_pointerE", "tensorrt_llm::runtime::BufferRange::const_pointer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange15const_referenceE", "tensorrt_llm::runtime::BufferRange::const_reference"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange3endEv", "tensorrt_llm::runtime::BufferRange::end"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange3endEv", "tensorrt_llm::runtime::BufferRange::end"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange8iteratorE", "tensorrt_llm::runtime::BufferRange::iterator"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mDataE", "tensorrt_llm::runtime::BufferRange::mData"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mSizeE", "tensorrt_llm::runtime::BufferRange::mSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]::index"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]::index"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange7pointerE", "tensorrt_llm::runtime::BufferRange::pointer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange9referenceE", "tensorrt_llm::runtime::BufferRange::reference"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4sizeEv", "tensorrt_llm::runtime::BufferRange::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange9size_typeE", "tensorrt_llm::runtime::BufferRange::size_type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange10value_typeE", "tensorrt_llm::runtime::BufferRange::value_type"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [2, 1, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [2, 1, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::kDataType"], [2, 5, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::kUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::type"], [2, 1, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>"], [2, 5, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::kUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>::type"], [2, 1, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>"], [2, 5, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::kUnsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::type"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::endIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxKvCacheLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxLength"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE", "tensorrt_llm::runtime::DecodingInput::badWordsList"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE", "tensorrt_llm::runtime::DecodingInput::finished"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", "tensorrt_llm::runtime::DecodingInput::logits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16maxKvCacheLengthE", "tensorrt_llm::runtime::DecodingInput::maxKvCacheLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE", "tensorrt_llm::runtime::DecodingInput::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbs"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::isDone"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScores"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScores"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeams"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsTgt"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsTgt"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::ids"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE", "tensorrt_llm::runtime::DecodingOutput::finished"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishedStepsE", "tensorrt_llm::runtime::DecodingOutput::finishedSteps"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE", "tensorrt_llm::runtime::GenerationInput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE", "tensorrt_llm::runtime::GenerationInput::Base"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::lengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::packed"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::padId"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenerationInput::TensorPtr"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE", "tensorrt_llm::runtime::GenerationOutput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE", "tensorrt_llm::runtime::GenerationOutput::Base"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::lengths"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenerationOutput::TensorPtr"], [2, 1, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::lengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::packed"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::padId"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput::PromptTuningParams"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput::TTensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenericGenerationInput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE", "tensorrt_llm::runtime::GenericGenerationInput::badWordsList"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE", "tensorrt_llm::runtime::GenericGenerationInput::embeddingBias"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE", "tensorrt_llm::runtime::GenericGenerationInput::endId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE", "tensorrt_llm::runtime::GenericGenerationInput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE", "tensorrt_llm::runtime::GenericGenerationInput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE", "tensorrt_llm::runtime::GenericGenerationInput::maxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE", "tensorrt_llm::runtime::GenericGenerationInput::packed"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE", "tensorrt_llm::runtime::GenericGenerationInput::padId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE", "tensorrt_llm::runtime::GenericGenerationInput::promptTuningParams"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE", "tensorrt_llm::runtime::GenericGenerationInput::stopWordsList"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE", "tensorrt_llm::runtime::GenericGenerationOutput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE", "tensorrt_llm::runtime::GenericGenerationOutput::Callback"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput::lengths"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE", "tensorrt_llm::runtime::GenericGenerationOutput::TTensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenericGenerationOutput::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE", "tensorrt_llm::runtime::GenericGenerationOutput::contextLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE", "tensorrt_llm::runtime::GenericGenerationOutput::cumLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE", "tensorrt_llm::runtime::GenericGenerationOutput::generationLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE", "tensorrt_llm::runtime::GenericGenerationOutput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE", "tensorrt_llm::runtime::GenericGenerationOutput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE", "tensorrt_llm::runtime::GenericGenerationOutput::logProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE", "tensorrt_llm::runtime::GenericGenerationOutput::onTokenGenerated"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::decodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::decodingOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::finalOutputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::manager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mAllocatorE", "tensorrt_llm::runtime::GptDecoder::mAllocator"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE", "tensorrt_llm::runtime::GptDecoder::mLogProbsTiled"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE", "tensorrt_llm::runtime::GptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatch::CudaStreamPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingInputPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingOutputPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSizePadded"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatch::TensorPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::finalize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv", "tensorrt_llm::runtime::GptDecoderBatch::finalize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::finalize::batchIdx"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync::e"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv", "tensorrt_llm::runtime::GptDecoderBatch::getAllNewTokens"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbSteps"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens::iter"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getParentIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mActualBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE", "tensorrt_llm::runtime::GptDecoderBatch::mBeamWidths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatch::mBufferManager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE", "tensorrt_llm::runtime::GptDecoderBatch::mDecoders"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingInputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingOutputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftTokenIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE", "tensorrt_llm::runtime::GptDecoderBatch::mFinished"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE", "tensorrt_llm::runtime::GptDecoderBatch::mFinishedSum"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardToken"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mGeneratedTokensPerStepE", "tensorrt_llm::runtime::GptDecoderBatch::mGeneratedTokensPerStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingInput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingOutput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mMaxKvCacheLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxKvCacheLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mMaxTokensPerStepE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE", "tensorrt_llm::runtime::GptDecoderBatch::mNbSteps"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mNumDraftTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE", "tensorrt_llm::runtime::GptDecoderBatch::mStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE", "tensorrt_llm::runtime::GptDecoderBatch::mStreams"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSizePadded"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::inputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::outputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::batchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::samplingConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest::batchIdx"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxKvCacheLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxTokensPerStep"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mGptModelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE", "tensorrt_llm::runtime::GptModelConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::hiddenSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbHeads"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbLayers"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::vocabSize"], [2, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGlm"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGpt"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits::computeContextLogits"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits::computeGenerationLogits"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv", "tensorrt_llm::runtime::GptModelConfig::getDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::GptModelConfig::getHiddenSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxBatchSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxInputLen"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::GptModelConfig::getMaxNumTokens"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxOutputLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxOutputLen"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxPromptEmbeddingTableSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv", "tensorrt_llm::runtime::GptModelConfig::getMaxTokensPerStep"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv", "tensorrt_llm::runtime::GptModelConfig::getModelVariant"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbHeads"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbKvHeads"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers::pipelineParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv", "tensorrt_llm::runtime::GptModelConfig::getQuantMode"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::GptModelConfig::getSizePerHead"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::GptModelConfig::getTokensPerBlock"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::GptModelConfig::getVocabSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded::worldSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeContextLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeGenerationLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE", "tensorrt_llm::runtime::GptModelConfig::mDataType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::GptModelConfig::mHiddenSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE", "tensorrt_llm::runtime::GptModelConfig::mInputPacked"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxDraftLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxInputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::GptModelConfig::mMaxNumTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxOutputLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxOutputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxPromptEmbeddingTableSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE", "tensorrt_llm::runtime::GptModelConfig::mModelVariant"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbHeads"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbKvHeads"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE", "tensorrt_llm::runtime::GptModelConfig::mNbLayers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE", "tensorrt_llm::runtime::GptModelConfig::mPagedKvCache"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE", "tensorrt_llm::runtime::GptModelConfig::mQuantMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::GptModelConfig::mTokensPerBlock"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE", "tensorrt_llm::runtime::GptModelConfig::mUseCustomAllReduce"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseGptAttentionPlugin"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE", "tensorrt_llm::runtime::GptModelConfig::mVocabSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize::maxBatchSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen::maxDraftLen"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen::maxInputLen"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens::maxNumTokens"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxOutputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxOutputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxOutputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxOutputLen::maxOutputLen"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant::modelVariant"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads::nbKvHeads"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode::QuantMode"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock::TokensPerBlock"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::GptModelConfig::supportsInflightBatching"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce::customAllReduce"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput::inputPacked"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache::pagedKvCache"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::GptModelConfig::usePromptTuning"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSessionE", "tensorrt_llm::runtime::GptSession"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE", "tensorrt_llm::runtime::GptSession::Config"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::ctxMicroBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE", "tensorrt_llm::runtime::GptSession::Config::cudaGraphMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE", "tensorrt_llm::runtime::GptSession::Config::decoderPerRequest"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::genMicroBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE", "tensorrt_llm::runtime::GptSession::Config::kvCacheConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::maxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE", "tensorrt_llm::runtime::GptSession::Config::maxBeamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE", "tensorrt_llm::runtime::GptSession::Config::maxSequenceLength"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create::graph"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch::stream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::nextContextId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::runtime"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update::graph"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream::stream"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineFile"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE", "tensorrt_llm::runtime::GptSession::KvCacheConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE", "tensorrt_llm::runtime::GptSession::KvCacheManager"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE", "tensorrt_llm::runtime::GptSession::LoggerPtr"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::ctxMicroBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::genMicroBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::ctxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::genBatchSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getCtxContextIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getCtxContextId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getCtxContextIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getCtxContextId::contextBatchId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getCtxContextIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getCtxContextId::generationBatchId"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getGenContextIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenContextId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getGenContextIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenContextId::flipFlopId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getGenContextIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenContextId::generationBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxBatches"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxPerGen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numGenBatches"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE", "tensorrt_llm::runtime::GptSession::TensorPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE", "tensorrt_llm::runtime::GptSession::TokenGeneratedCallback"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers::numMicroBatches"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType8SizeTypeb", "tensorrt_llm::runtime::GptSession::createContexts"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType8SizeTypeb", "tensorrt_llm::runtime::GptSession::createContexts::numBatchesCtx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType8SizeTypeb", "tensorrt_llm::runtime::GptSession::createContexts::numBatchesGen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType8SizeTypeb", "tensorrt_llm::runtime::GptSession::createContexts::useCudaGraphs"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::maxSequenceLength"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::decoderPerRequest"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::logitsType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::maxKvCacheLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::numMicroBatches"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::config"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::maxKvCacheLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::maxSequenceLength"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput", "tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput", "tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback::outputs"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::decoderStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::microBatchId"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::kvCacheManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::microBatchOffsets"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::microBatches"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::kvCacheManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchOffsets"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesFinished"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesInputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesOutputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::step"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType", "tensorrt_llm::runtime::GptSession::finalize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType", "tensorrt_llm::runtime::GptSession::finalize::microBatchId"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::inputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::outputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::microBatchesInputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::microBatchesOutputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::onTokenGenerated"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback", "tensorrt_llm::runtime::GptSession::generateBatched::samplingConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv", "tensorrt_llm::runtime::GptSession::getBufferManager"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv", "tensorrt_llm::runtime::GptSession::getDevice"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv", "tensorrt_llm::runtime::GptSession::getLogger"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv", "tensorrt_llm::runtime::GptSession::getModelConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv", "tensorrt_llm::runtime::GptSession::getWorldConfig"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::inputs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::microBatchId"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::outputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::outputs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::firstBatchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::microBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE", "tensorrt_llm::runtime::GptSession::mBuffers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE", "tensorrt_llm::runtime::GptSession::mCommEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE", "tensorrt_llm::runtime::GptSession::mCommPtrs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE", "tensorrt_llm::runtime::GptSession::mCommStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE", "tensorrt_llm::runtime::GptSession::mCudaGraphInstances"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE", "tensorrt_llm::runtime::GptSession::mCudaGraphMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession24mDecoderMaxKvCacheLengthE", "tensorrt_llm::runtime::GptSession::mDecoderMaxKvCacheLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE", "tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE", "tensorrt_llm::runtime::GptSession::mDecoders"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE", "tensorrt_llm::runtime::GptSession::mDevice"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE", "tensorrt_llm::runtime::GptSession::mIpcMemoryHandles"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE", "tensorrt_llm::runtime::GptSession::mKvCacheManager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE", "tensorrt_llm::runtime::GptSession::mLogger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE", "tensorrt_llm::runtime::GptSession::mMicroBatchConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE", "tensorrt_llm::runtime::GptSession::mModelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE", "tensorrt_llm::runtime::GptSession::mPipelineComm"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE", "tensorrt_llm::runtime::GptSession::mReceivedEvents"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE", "tensorrt_llm::runtime::GptSession::mRuntime"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE", "tensorrt_llm::runtime::GptSession::mWorldConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config", "tensorrt_llm::runtime::GptSession::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config", "tensorrt_llm::runtime::GptSession::setup::sessionConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::beamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::microBatchId"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv", "tensorrt_llm::runtime::GptSession::useCudaGraphs"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::contextLengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::draftTokenIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::finishedFinal"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::finishedSum"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::finishedVec"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::numDraftTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::sequenceLengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptTokens::targetTokenIds"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingOutput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::finalOutputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::manager"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE", "tensorrt_llm::runtime::IGptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::CudaStreamPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv", "tensorrt_llm::runtime::IGptDecoderBatch::IGptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TensorPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TokenPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::finalize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::finalize::batchIdx"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync::token"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::IGptDecoderBatch::getFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getNbSteps"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds::batchIdx"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getParentIds"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::batchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::samplingConfig"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE", "tensorrt_llm::runtime::IStatefulGptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv", "tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv", "tensorrt_llm::runtime::IStatefulGptDecoder::finalize"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardSync"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getAllNewTokens"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getCumLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getLogProbs"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens::iter"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getOutputIds"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::inputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::outputs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::samplingConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::dtype"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBeamWidth"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxKvCacheLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxTokensPerStep"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev", "tensorrt_llm::runtime::IStatefulGptDecoder::~IStatefulGptDecoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE", "tensorrt_llm::runtime::ITensor::DimType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [2, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE", "tensorrt_llm::runtime::IpcMemory::TensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrsTensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE", "tensorrt_llm::runtime::IpcMemory::mBufferPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE", "tensorrt_llm::runtime::IpcMemory::mBufferSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE", "tensorrt_llm::runtime::IpcMemory::mWorldConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE", "tensorrt_llm::runtime::MemoryCounters::SizeType"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [2, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [2, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [2, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [2, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters9mInstanceE", "tensorrt_llm::runtime::MemoryCounters::mInstance"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [2, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [2, 1, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [2, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kCPU>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kCPU>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kGPU>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kGPU>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kPINNED>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kPINNED>::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE", "tensorrt_llm::runtime::PromptTuningParams::SizeType"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE", "tensorrt_llm::runtime::SizeType"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [2, 1, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [2, 5, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType<T*>"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType<T*>::T"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType<T*>::kUnderlyingType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType<T*>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType<bool>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType<bool>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType<float>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType<float>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType<half>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType<half>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType<std::int32_t>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::int32_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType<std::int64_t>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::int64_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType<std::int8_t>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::int8_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType<std::uint32_t>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::uint32_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType<std::uint64_t>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::uint64_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType<std::uint8_t>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::uint8_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType<void*>"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType<void*>::value"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::logger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::pipelineParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::tensorParallelism"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [2, 2, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [2, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [2, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [2, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE", "tensorrt_llm::runtime::decoder::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE", "tensorrt_llm::runtime::decoder::Input::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Input::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE", "tensorrt_llm::runtime::decoder::Input::logits"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE", "tensorrt_llm::runtime::decoder::Output"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv", "tensorrt_llm::runtime::decoder::Output::Output"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE", "tensorrt_llm::runtime::decoder::Output::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Output::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE", "tensorrt_llm::runtime::decoder::Output::sequenceLengths"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", "tensorrt_llm::runtime::decoder_batch::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE", "tensorrt_llm::runtime::decoder_batch::Input::active"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", "tensorrt_llm::runtime::decoder_batch::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", "tensorrt_llm::runtime::decoder_batch::Output"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", "tensorrt_llm::runtime::decoder_batch::Request"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", "tensorrt_llm::runtime::decoder_batch::Request::BufferPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::ConstTensorPtr"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::endId"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::ids"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::inputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::maxNewTokens"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::badWordsList"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE", "tensorrt_llm::runtime::decoder_batch::Request::computeCumLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE", "tensorrt_llm::runtime::decoder_batch::Request::computeLogProbs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", "tensorrt_llm::runtime::decoder_batch::Request::draftTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", "tensorrt_llm::runtime::decoder_batch::Request::embeddingBias"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", "tensorrt_llm::runtime::decoder_batch::Request::endId"], [2, 2, 1, "_CPPv4NK12tensorrt_llm7runtime13decoder_batch7Request22generatedTokensPerStepEv", "tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", "tensorrt_llm::runtime::decoder_batch::Request::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", "tensorrt_llm::runtime::decoder_batch::Request::inputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", "tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE", "tensorrt_llm::runtime::decoder_batch::Token"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::active"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::event"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE", "tensorrt_llm::runtime::decoder_batch::Token::active"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE", "tensorrt_llm::runtime::decoder_batch::Token::event"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator<<"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator<<"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator<<"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator<<::buffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator<<::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator<<::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator<<::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator<<::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator<<::tensor"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::enable"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime5utilsE", "tensorrt_llm::runtime::utils"], [2, 2, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine::enginePath"], [20, 8, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[15, 8, 0, "-", "functional"], [17, 8, 0, "-", "models"], [18, 8, 0, "-", "plugin"], [19, 8, 0, "-", "quantization"], [20, 8, 0, "-", "runtime"]], "tensorrt_llm.functional": [[15, 9, 1, "", "AllReduceStrategy"], [15, 9, 1, "", "AttentionMaskType"], [15, 9, 1, "", "DimRange"], [15, 9, 1, "", "LayerNormPositionType"], [15, 9, 1, "", "LayerNormType"], [15, 9, 1, "", "MLPType"], [15, 9, 1, "", "PositionEmbeddingType"], [15, 9, 1, "", "RotaryScalingType"], [15, 9, 1, "", "Tensor"], [15, 13, 1, "", "abs"], [15, 13, 1, "", "activation"], [15, 13, 1, "", "add"], [15, 13, 1, "", "allgather"], [15, 13, 1, "", "allreduce"], [15, 13, 1, "", "arange"], [15, 13, 1, "", "argmax"], [15, 13, 1, "", "assertion"], [15, 13, 1, "", "avg_pool2d"], [15, 13, 1, "", "bert_attention"], [15, 13, 1, "", "broadcast_helper"], [15, 13, 1, "", "cast"], [15, 13, 1, "", "chunk"], [15, 13, 1, "", "clip"], [15, 13, 1, "", "concat"], [15, 13, 1, "", "constant"], [15, 13, 1, "", "constant_to_tensor_"], [15, 13, 1, "", "conv2d"], [15, 13, 1, "", "conv_transpose2d"], [15, 13, 1, "", "cos"], [15, 13, 1, "", "div"], [15, 13, 1, "", "einsum"], [15, 13, 1, "", "elementwise_binary"], [15, 13, 1, "", "embedding"], [15, 13, 1, "", "eq"], [15, 13, 1, "", "exp"], [15, 13, 1, "", "expand"], [15, 13, 1, "", "expand_dims"], [15, 13, 1, "", "expand_dims_like"], [15, 13, 1, "", "expand_mask"], [15, 13, 1, "", "flip"], [15, 13, 1, "", "gather"], [15, 13, 1, "", "gather_last_token_logits"], [15, 13, 1, "", "geglu"], [15, 13, 1, "", "gelu"], [15, 13, 1, "", "generate_alibi_biases"], [15, 13, 1, "", "generate_alibi_slopes"], [15, 13, 1, "", "gpt_attention"], [15, 13, 1, "", "group_norm"], [15, 13, 1, "", "gt"], [15, 13, 1, "", "identity"], [15, 13, 1, "", "index_select"], [15, 13, 1, "", "interpolate"], [15, 13, 1, "", "is_gated_activation"], [15, 13, 1, "", "layer_norm"], [15, 13, 1, "", "lora_plugin"], [15, 13, 1, "", "lt"], [15, 13, 1, "", "matmul"], [15, 13, 1, "", "max"], [15, 13, 1, "", "maximum"], [15, 13, 1, "", "mean"], [15, 13, 1, "", "minimum"], [15, 13, 1, "", "mul"], [15, 13, 1, "", "non_gated_version"], [15, 13, 1, "", "op_and"], [15, 13, 1, "", "op_or"], [15, 13, 1, "", "outer"], [15, 13, 1, "", "permute"], [15, 13, 1, "", "pow"], [15, 13, 1, "", "recv"], [15, 13, 1, "", "relu"], [15, 13, 1, "", "repeat_interleave"], [15, 13, 1, "", "rms_norm"], [15, 13, 1, "", "round"], [15, 13, 1, "", "select"], [15, 13, 1, "", "send"], [15, 13, 1, "", "shape"], [15, 13, 1, "", "sigmoid"], [15, 13, 1, "", "silu"], [15, 13, 1, "", "sin"], [15, 13, 1, "", "slice"], [15, 13, 1, "", "softmax"], [15, 13, 1, "", "softplus"], [15, 13, 1, "", "split"], [15, 13, 1, "", "sqrt"], [15, 13, 1, "", "squared_relu"], [15, 13, 1, "", "sub"], [15, 13, 1, "", "swiglu"], [15, 13, 1, "", "tanh"], [15, 13, 1, "", "transpose"], [15, 13, 1, "", "unary"], [15, 13, 1, "", "unsqueeze"], [15, 13, 1, "", "view"], [15, 13, 1, "", "where"]], "tensorrt_llm.functional.AllReduceStrategy": [[15, 10, 1, "", "AUTO"], [15, 10, 1, "", "ONESHOT"], [15, 10, 1, "", "RING"], [15, 10, 1, "", "TWOSHOT"]], "tensorrt_llm.functional.AttentionMaskType": [[15, 10, 1, "", "bidirectional"], [15, 10, 1, "", "bidirectionalglm"], [15, 10, 1, "", "causal"], [15, 10, 1, "", "padding"]], "tensorrt_llm.functional.LayerNormPositionType": [[15, 10, 1, "", "post_layernorm"], [15, 10, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[15, 10, 1, "", "GroupNorm"], [15, 10, 1, "", "LayerNorm"], [15, 10, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[15, 10, 1, "", "FusedGatedMLP"], [15, 10, 1, "", "GatedMLP"], [15, 10, 1, "", "MLP"]], "tensorrt_llm.functional.PositionEmbeddingType": [[15, 10, 1, "", "alibi"], [15, 10, 1, "", "alibi_with_scale"], [15, 10, 1, "", "chatglm"], [15, 11, 1, "", "choices"], [15, 11, 1, "", "is_alibi"], [15, 11, 1, "", "is_rope"], [15, 10, 1, "", "learned_absolute"], [15, 10, 1, "", "relative"], [15, 10, 1, "", "rope_gpt_neox"], [15, 10, 1, "", "rope_gptj"]], "tensorrt_llm.functional.RotaryScalingType": [[15, 10, 1, "", "dynamic"], [15, 10, 1, "", "linear"], [15, 10, 1, "", "none"]], "tensorrt_llm.functional.Tensor": [[15, 11, 1, "", "abs"], [15, 11, 1, "", "cast"], [15, 12, 1, "", "dtype"], [15, 11, 1, "", "get_parent"], [15, 11, 1, "", "get_users"], [15, 11, 1, "", "is_dynamic"], [15, 11, 1, "", "is_trt_wrapper"], [15, 12, 1, "", "location"], [15, 11, 1, "", "mark_output"], [15, 11, 1, "", "max"], [15, 11, 1, "", "mean"], [15, 12, 1, "", "name"], [15, 11, 1, "", "ndim"], [15, 12, 1, "", "network"], [15, 11, 1, "", "permute"], [15, 11, 1, "", "rank"], [15, 11, 1, "", "replace_all_uses_with"], [15, 12, 1, "", "shape"], [15, 11, 1, "", "size"], [15, 11, 1, "", "split"], [15, 11, 1, "", "sqrt"], [15, 11, 1, "", "transpose"], [15, 11, 1, "", "view"]], "tensorrt_llm.layers": [[16, 8, 0, "-", "activation"], [16, 8, 0, "-", "attention"], [16, 8, 0, "-", "cast"], [16, 8, 0, "-", "conv"], [16, 8, 0, "-", "embedding"], [16, 8, 0, "-", "linear"], [16, 8, 0, "-", "mlp"], [16, 8, 0, "-", "normalization"], [16, 8, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[16, 9, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[16, 9, 1, "", "Attention"], [16, 9, 1, "", "AttentionParams"], [16, 9, 1, "", "BertAttention"], [16, 9, 1, "", "KeyValueCacheParams"], [16, 9, 1, "", "RopeEmbeddingUtils"]], "tensorrt_llm.layers.attention.Attention": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention.AttentionParams": [[16, 11, 1, "", "is_valid"], [16, 11, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[16, 11, 1, "", "fill_none_tensor_list"], [16, 11, 1, "", "get_first_kv_cache_block_pointers"], [16, 11, 1, "", "get_first_past_key_value"], [16, 11, 1, "", "is_valid"]], "tensorrt_llm.layers.attention.RopeEmbeddingUtils": [[16, 11, 1, "", "apply_rotary_pos_emb"], [16, 11, 1, "", "apply_rotary_pos_emb_chatglm"], [16, 11, 1, "", "create_sinusoidal_positions"], [16, 11, 1, "", "rotate_every_two"], [16, 11, 1, "", "rotate_half"]], "tensorrt_llm.layers.cast": [[16, 9, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[16, 9, 1, "", "Conv2d"], [16, 9, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv2d": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[16, 9, 1, "", "Embedding"], [16, 9, 1, "", "PromptTuningEmbedding"]], "tensorrt_llm.layers.embedding.Embedding": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[16, 10, 1, "", "ColumnLinear"], [16, 9, 1, "", "Linear"], [16, 9, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[16, 11, 1, "", "forward"], [16, 11, 1, "", "multiply_gather"]], "tensorrt_llm.layers.linear.RowLinear": [[16, 11, 1, "", "forward"], [16, 11, 1, "", "multiply_reduce"]], "tensorrt_llm.layers.mlp": [[16, 9, 1, "", "FusedGatedMLP"], [16, 9, 1, "", "GatedMLP"], [16, 9, 1, "", "MLP"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[16, 9, 1, "", "GroupNorm"], [16, 9, 1, "", "LayerNorm"], [16, 9, 1, "", "RmsNorm"]], "tensorrt_llm.layers.normalization.GroupNorm": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[16, 11, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[16, 9, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[16, 11, 1, "", "forward"]], "tensorrt_llm.models": [[17, 9, 1, "", "BaichuanForCausalLM"], [17, 9, 1, "", "BertForQuestionAnswering"], [17, 9, 1, "", "BertModel"], [17, 9, 1, "", "BloomForCausalLM"], [17, 9, 1, "", "BloomModel"], [17, 9, 1, "", "ChatGLMHeadModel"], [17, 9, 1, "", "ChatGLMModel"], [17, 9, 1, "", "DecoderModel"], [17, 9, 1, "", "EncoderModel"], [17, 9, 1, "", "FalconForCausalLM"], [17, 9, 1, "", "FalconModel"], [17, 9, 1, "", "GPTJForCausalLM"], [17, 9, 1, "", "GPTJModel"], [17, 9, 1, "", "GPTLMHeadModel"], [17, 9, 1, "", "GPTModel"], [17, 9, 1, "", "GPTNeoXForCausalLM"], [17, 9, 1, "", "GPTNeoXModel"], [17, 9, 1, "", "LLaMAForCausalLM"], [17, 9, 1, "", "LLaMAModel"], [17, 9, 1, "", "OPTLMHeadModel"], [17, 9, 1, "", "OPTModel"], [17, 9, 1, "", "QWenForCausalLM"], [17, 13, 1, "", "quantize_model"]], "tensorrt_llm.models.BaichuanForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.BertForQuestionAnswering": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.BloomForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.BloomModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMHeadModel": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.ChatGLMModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.DecoderModel": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.FalconForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.FalconModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTJForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTJModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTLMHeadModel": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTNeoXModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.LLaMAForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.LLaMAModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.OPTLMHeadModel": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.OPTModel": [[17, 11, 1, "", "forward"]], "tensorrt_llm.models.QWenForCausalLM": [[17, 11, 1, "", "forward"], [17, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.quantization": [[19, 9, 1, "", "QuantMode"]], "tensorrt_llm.runtime": [[20, 9, 1, "", "ChatGLMGenerationSession"], [20, 9, 1, "", "GenerationSequence"], [20, 9, 1, "", "GenerationSession"], [20, 9, 1, "", "KVCacheManager"], [20, 9, 1, "", "ModelConfig"], [20, 9, 1, "", "ModelRunner"], [20, 9, 1, "", "Session"], [20, 9, 1, "", "TensorInfo"], [20, 13, 1, "", "to_word_list_format"]], "tensorrt_llm.runtime.GenerationSequence": [[20, 11, 1, "", "get_batch_idx"], [20, 11, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[20, 10, 1, "", "batch_size"], [20, 10, 1, "", "buffer_allocated"], [20, 12, 1, "", "cross_attention"], [20, 10, 1, "", "cuda_graph_mode"], [20, 11, 1, "", "cuda_stream_guard"], [20, 10, 1, "", "debug_mode"], [20, 10, 1, "", "debug_tensors_to_save"], [20, 11, 1, "", "decode"], [20, 11, 1, "", "decode_batch"], [20, 11, 1, "", "decode_regular"], [20, 11, 1, "", "decode_stream"], [20, 10, 1, "", "device"], [20, 12, 1, "", "dtype"], [20, 11, 1, "", "finalize_decoder"], [20, 12, 1, "", "first_layer"], [20, 12, 1, "", "gather_all_token_logits"], [20, 11, 1, "", "handle_per_step"], [20, 12, 1, "", "has_position_embedding"], [20, 12, 1, "", "has_token_type_embedding"], [20, 12, 1, "", "head_size"], [20, 12, 1, "", "hidden_size"], [20, 12, 1, "", "last_layer"], [20, 10, 1, "", "mapping"], [20, 12, 1, "", "num_heads"], [20, 12, 1, "", "num_heads_kv"], [20, 12, 1, "", "num_layers"], [20, 12, 1, "", "paged_kv_cache"], [20, 11, 1, "", "pp_communicate_final_output_ids"], [20, 11, 1, "", "pp_communicate_new_tokens"], [20, 12, 1, "", "quant_mode"], [20, 12, 1, "", "remove_input_padding"], [20, 10, 1, "", "runtime"], [20, 11, 1, "", "setup"], [20, 12, 1, "", "tokens_per_block"], [20, 12, 1, "", "use_custom_all_reduce"], [20, 12, 1, "", "use_gpt_attention_plugin"], [20, 12, 1, "", "use_lora_plugin"], [20, 12, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[20, 11, 1, "", "add_sequence"], [20, 11, 1, "", "get_pointer_arrays"], [20, 11, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[20, 10, 1, "", "cross_attention"], [20, 10, 1, "", "dtype"], [20, 10, 1, "", "gather_all_token_logits"], [20, 10, 1, "", "gpt_attention_plugin"], [20, 10, 1, "", "has_position_embedding"], [20, 10, 1, "", "has_token_type_embedding"], [20, 10, 1, "", "head_size"], [20, 10, 1, "", "hidden_size"], [20, 10, 1, "", "lora_plugin"], [20, 10, 1, "", "max_prompt_embedding_table_size"], [20, 10, 1, "", "model_name"], [20, 10, 1, "", "num_heads"], [20, 10, 1, "", "num_kv_heads"], [20, 10, 1, "", "num_layers"], [20, 10, 1, "", "paged_kv_cache"], [20, 10, 1, "", "quant_mode"], [20, 10, 1, "", "remove_input_padding"], [20, 10, 1, "", "tokens_per_block"], [20, 10, 1, "", "use_custom_all_reduce"], [20, 10, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[20, 11, 1, "", "from_dir"], [20, 11, 1, "", "generate"], [20, 12, 1, "", "remove_input_padding"]], "tensorrt_llm.runtime.Session": [[20, 12, 1, "", "context"], [20, 12, 1, "", "engine"], [20, 11, 1, "", "from_engine"], [20, 11, 1, "", "from_serialized_engine"], [20, 11, 1, "", "infer_shapes"], [20, 11, 1, "", "run"], [20, 12, 1, "", "runtime"], [20, 11, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[20, 10, 1, "", "dtype"], [20, 10, 1, "", "name"], [20, 10, 1, "", "shape"]]}, "objtypes": {"0": "cpp:type", "1": "cpp:class", "2": "cpp:function", "3": "cpp:functionParam", "4": "cpp:member", "5": "cpp:templateParam", "6": "cpp:enum", "7": "cpp:enumerator", "8": "py:module", "9": "py:class", "10": "py:attribute", "11": "py:method", "12": "py:property", "13": "py:function"}, "objnames": {"0": ["cpp", "type", "C++ type"], "1": ["cpp", "class", "C++ class"], "2": ["cpp", "function", "C++ function"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "member", "C++ member"], "5": ["cpp", "templateParam", "C++ template parameter"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["py", "module", "Python module"], "9": ["py", "class", "Python class"], "10": ["py", "attribute", "Python attribute"], "11": ["py", "method", "Python method"], "12": ["py", "property", "Python property"], "13": ["py", "function", "Python function"]}, "titleterms": {"how": [0, 1], "add": 0, "new": 0, "model": [0, 1, 3, 8, 13, 17], "step": [0, 11], "debug": 1, "overview": [1, 11], "unit": 1, "test": 1, "e2": 1, "execut": [1, 4], "error": 1, "runtim": [2, 3, 8, 11, 12, 20], "buffermanag": 2, "h": 2, "common": 2, "cudaev": 2, "cudastream": 2, "decodinginput": 2, "decodingoutput": 2, "generationinput": 2, "generationoutput": 2, "gptdecod": 2, "gptdecoderbatch": 2, "gptjsonconfig": 2, "gptmodelconfig": 2, "gptsession": 2, "ibuff": 2, "igptdecoderbatch": 2, "istatefulgptdecod": 2, "itensor": 2, "ipcutil": 2, "memorycount": 2, "prompttuningparam": 2, "samplingconfig": 2, "tllmlogger": 2, "worldconfig": 2, "tensorrt": [3, 4, 5, 6, 10, 11, 12, 13], "llm": [3, 4, 5, 6, 10, 11, 12, 13], "architectur": 3, "definit": 3, "compil": 3, "weight": [3, 12, 14], "bind": [3, 11], "pattern": [3, 9], "match": 3, "fusion": 3, "plugin": [3, 18], "multi": [3, 4, 7], "gpu": [3, 4, 12, 13], "node": 3, "support": [3, 8, 11, 14], "In": [3, 4, 8], "flight": [3, 4, 8], "batch": [3, 4, 7, 8], "The": [4, 8, 14], "manag": [4, 9], "api": [4, 9, 10], "get": 4, "send": 4, "callback": 4, "request": 4, "interrupt": 4, "statist": 4, "gptmanag": 4, "design": 4, "triton": 4, "infer": [4, 12], "server": 4, "h100": [5, 6, 13], "ha": 5, "4": 5, "6x": 5, "a100": [5, 13], "perform": [5, 13], "achiev": [5, 6], "10": 5, "000": [5, 6], "tok": 5, "": [5, 7, 10], "100m": 5, "first": [5, 13], "token": [5, 6, 13], "mlperf": 5, "fp8": [5, 7, 13, 14], "what": 5, "i": 5, "h200": 6, "nearli": 6, "12": 6, "sec": 6, "llama2": [6, 13], "13b": 6, "v": 6, "latest": 6, "hbm": 6, "memori": [6, 12], "head": 7, "queri": 7, "group": 7, "attent": [7, 16], "import": 7, "note": 7, "pad": 7, "pack": 7, "tensor": [7, 9, 12], "context": 7, "gener": [7, 8], "phase": 7, "inflight": 7, "kv": [7, 12], "cach": [7, 12], "contigu": 7, "page": 7, "int8": [7, 14], "slide": 7, "window": 7, "cyclic": 7, "roll": 7, "buffer": 7, "beam": 7, "search": 7, "input": [7, 8], "qkv": 7, "addit": 7, "featur": 7, "rotari": 7, "posit": 7, "embed": [7, 16], "rope": 7, "alibi": 7, "scale": 7, "factor": 7, "cross": 7, "rel": 7, "bia": 7, "rab": 7, "c": [8, 10, 11, 12], "gpt": [8, 13], "session": 8, "creation": 8, "configur": 8, "world": 8, "output": 8, "sampl": 8, "paramet": 8, "intern": 8, "compon": 8, "know": 8, "issu": [8, 12, 13], "futur": 8, "chang": 8, "graph": 9, "rewrit": 9, "modul": 9, "when": 9, "us": 9, "relat": 9, "method": 9, "flayerinfo": 9, "retriev": 9, "high": [9, 13], "level": 9, "inform": 9, "function": [9, 15], "record_signatur": 9, "decor": 9, "requir": 9, "classic": 9, "workflow": 9, "welcom": 10, "document": 10, "content": 10, "python": [10, 11, 12], "indic": 10, "tabl": 10, "blog": 10, "build": [11, 13], "fetch": 11, "sourc": 11, "One": 11, "creat": 11, "contain": [11, 13], "On": 11, "system": 11, "gnu": 11, "make": 11, "without": 11, "link": 11, "header": 11, "file": 11, "usag": 12, "understand": 12, "time": 12, "size": 12, "activ": [12, 16], "pool": [12, 16], "known": [12, 13], "faq": 12, "methodologi": 13, "throughput": 13, "l40": 13, "fp16": [13, 14], "low": 13, "latenc": 13, "fuse": 13, "matmul": 13, "gate": 13, "silu": 13, "llama": 13, "reproduc": 13, "benchmark": 13, "result": 13, "engin": 13, "setup": 13, "run": 13, "per": 13, "j": 13, "6b": 13, "7b": 13, "70b": 13, "falcon": 13, "180b": 13, "numer": 14, "precis": 14, "fp32": 14, "bf16": 14, "quantiz": [14, 19], "dequant": 14, "q": 14, "dq": 14, "smoothquant": 14, "w8a8": 14, "int4": 14, "onli": 14, "w4a16": 14, "w8a16": 14, "gptq": 14, "awq": 14, "hopper": 14, "matrix": 14, "technic": 14, "detail": 14, "quantmod": 14, "flag": 14, "layer": 16, "cast": 16, "conv": 16, "linear": 16, "mlp": 16, "normal": 16}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.todo": 2, "sphinx": 60}, "alltitles": {"How to add a new model": [[0, "how-to-add-a-new-model"]], "Steps": [[0, "steps"]], "How to debug": [[1, "how-to-debug"]], "Overview": [[1, "overview"], [11, "overview"]], "Debug on unit tests": [[1, "debug-on-unit-tests"]], "Debug on E2E models": [[1, "debug-on-e2e-models"]], "Debug execution errors": [[1, "debug-execution-errors"]], "Runtime": [[2, "runtime"], [3, "runtime"], [20, "module-tensorrt_llm"]], "bufferManager.h": [[2, "buffermanager-h"]], "common.h": [[2, "common-h"]], "cudaEvent.h": [[2, "cudaevent-h"]], "cudaStream.h": [[2, "cudastream-h"]], "decodingInput.h": [[2, "decodinginput-h"]], "decodingOutput.h": [[2, "decodingoutput-h"]], "generationInput.h": [[2, "generationinput-h"]], "generationOutput.h": [[2, "generationoutput-h"]], "gptDecoder.h": [[2, "gptdecoder-h"]], "gptDecoderBatch.h": [[2, "gptdecoderbatch-h"]], "gptJsonConfig.h": [[2, "gptjsonconfig-h"]], "gptModelConfig.h": [[2, "gptmodelconfig-h"]], "gptSession.h": [[2, "gptsession-h"]], "iBuffer.h": [[2, "ibuffer-h"]], "iGptDecoderBatch.h": [[2, "igptdecoderbatch-h"]], "iStatefulGptDecoder.h": [[2, "istatefulgptdecoder-h"]], "iTensor.h": [[2, "itensor-h"]], "ipcUtils.h": [[2, "ipcutils-h"]], "memoryCounters.h": [[2, "memorycounters-h"]], "promptTuningParams.h": [[2, "prompttuningparams-h"]], "samplingConfig.h": [[2, "samplingconfig-h"]], "tllmLogger.h": [[2, "tllmlogger-h"]], "worldConfig.h": [[2, "worldconfig-h"]], "TensorRT-LLM Architecture": [[3, "tensorrt-llm-architecture"]], "Model Definition": [[3, "model-definition"]], "Compilation": [[3, "compilation"]], "Weight Bindings": [[3, "weight-bindings"]], "Pattern-Matching and Fusion": [[3, "pattern-matching-and-fusion"]], "Plugins": [[3, "plugins"]], "Multi-GPU and Multi-Node Support": [[3, "multi-gpu-and-multi-node-support"]], "In-flight Batching": [[3, "in-flight-batching"]], "The Batch Manager in TensorRT-LLM": [[4, "the-batch-manager-in-tensorrt-llm"]], "The Batch Manager API": [[4, "the-batch-manager-api"]], "Get and Send Callbacks": [[4, "get-and-send-callbacks"]], "Request Interruption": [[4, "request-interruption"]], "Statistics": [[4, "statistics"]], "GptManager Design": [[4, "gptmanager-design"]], "Multi-GPU execution": [[4, "multi-gpu-execution"]], "In-flight Batching with the Triton Inference Server": [[4, "in-flight-batching-with-the-triton-inference-server"]], "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token": [[5, "h100-has-4-6x-a100-performance-in-tensorrt-llm-achieving-10-000-tok-s-at-100ms-to-first-token"]], "MLPerf on H100 with FP8": [[5, "mlperf-on-h100-with-fp8"]], "What is H100 FP8?": [[5, "what-is-h100-fp8"]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM": [[6, "h200-achieves-nearly-12-000-tokens-sec-on-llama2-13b-with-tensorrt-llm"]], "H200 vs H100": [[6, "h200-vs-h100"]], "Latest HBM Memory": [[6, "latest-hbm-memory"]], "Multi-head, Multi-query and Group-query Attention": [[7, "multi-head-multi-query-and-group-query-attention"]], "Important Note": [[7, "important-note"]], "Padded and Packed Tensors": [[7, "padded-and-packed-tensors"]], "Context and Generation Phases": [[7, "context-and-generation-phases"]], "Context Phase": [[7, "context-phase"]], "Generation Phase": [[7, "generation-phase"]], "Inflight batching": [[7, "inflight-batching"]], "KV Cache(s)": [[7, "kv-cache-s"]], "Contiguous KV Cache": [[7, "contiguous-kv-cache"]], "Paged KV Cache": [[7, "paged-kv-cache"]], "INT8/FP8 KV Caches": [[7, "int8-fp8-kv-caches"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[7, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Beam-Search": [[7, "beam-search"]], "Input QKV tensor": [[7, "input-qkv-tensor"]], "Additional Features": [[7, "additional-features"]], "Rotary Positional Embedding (RoPE)": [[7, "rotary-positional-embedding-rope"]], "ALiBi": [[7, "alibi"]], "Scaling factor(s)": [[7, "scaling-factor-s"]], "Cross Attention": [[7, "cross-attention"]], "Relative Attention Bias (RAB)": [[7, "relative-attention-bias-rab"]], "C++ GPT Runtime": [[8, "c-gpt-runtime"]], "The Session": [[8, "the-session"]], "Creation": [[8, "creation"]], "Session Configuration": [[8, "session-configuration"]], "Model Configuration": [[8, "model-configuration"]], "World Configuration": [[8, "world-configuration"]], "Generation": [[8, "generation"]], "Inputs and Outputs": [[8, "inputs-and-outputs"]], "Sampling Parameters": [[8, "sampling-parameters"]], "Internal Components": [[8, "internal-components"]], "In-flight Batching Support": [[8, "in-flight-batching-support"]], "Know Issues and Future Changes": [[8, "know-issues-and-future-changes"]], "Graph Rewriting Module": [[9, "graph-rewriting-module"]], "When to Use Graph Rewriting?": [[9, "when-to-use-graph-rewriting"]], "Graph Rewriting APIs": [[9, "graph-rewriting-apis"]], "Tensor-Related Methods": [[9, "tensor-related-methods"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[9, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "Pattern and Pattern Manager": [[9, "pattern-and-pattern-manager"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[9, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "Classical Workflow": [[9, "classical-workflow"]], "Welcome to TensorRT-LLM\u2019s documentation!": [[10, "welcome-to-tensorrt-llm-s-documentation"]], "Contents:": [[10, null]], "Python API": [[10, "python-api"]], "C++ API": [[10, "c-api"]], "Indices and tables": [[10, "indices-and-tables"]], "Blogs": [[10, "blogs"]], "Build TensorRT-LLM": [[11, "build-tensorrt-llm"], [11, "id1"]], "Fetch the Sources": [[11, "fetch-the-sources"]], "Build TensorRT-LLM in One Step": [[11, "build-tensorrt-llm-in-one-step"]], "Build Step-by-step": [[11, "build-step-by-step"]], "Create the Container": [[11, "create-the-container"]], "On Systems with GNU make": [[11, "on-systems-with-gnu-make"]], "On Systems Without GNU make": [[11, "on-systems-without-gnu-make"]], "Build the Python Bindings for the C++ Runtime": [[11, "build-the-python-bindings-for-the-c-runtime"]], "Link with the TensorRT-LLM C++ Runtime": [[11, "link-with-the-tensorrt-llm-c-runtime"]], "Supported C++ Header Files": [[11, "supported-c-header-files"]], "Memory Usage of TensorRT-LLM": [[12, "memory-usage-of-tensorrt-llm"]], "Understand inference time GPU memory usage": [[12, "understand-inference-time-gpu-memory-usage"]], "Weights size": [[12, "weights-size"]], "Activation size": [[12, "activation-size"]], "KV cache tensor": [[12, "kv-cache-tensor"]], "Python runtime": [[12, "python-runtime"]], "C++ runtime": [[12, "c-runtime"]], "Memory pool": [[12, "memory-pool"]], "Known Issues": [[12, "known-issues"], [13, "known-issues"]], "FAQ": [[12, "faq"]], "Performance of TensorRT-LLM": [[13, "performance-of-tensorrt-llm"]], "Methodology": [[13, "methodology"], [13, "id4"]], "High Throughput": [[13, "high-throughput"]], "H100 GPUs (FP8)": [[13, "h100-gpus-fp8"], [13, "id1"]], "L40S GPUs (FP8)": [[13, "l40s-gpus-fp8"], [13, "id2"]], "A100 GPUs (FP16)": [[13, "a100-gpus-fp16"], [13, "id3"]], "Low Latency": [[13, "low-latency"]], "Fused Matmul + Gated-SiLU (LLaMA)": [[13, "fused-matmul-gated-silu-llama"]], "Reproducing Benchmarked Results": [[13, "reproducing-benchmarked-results"]], "Building the TensorRT-LLM Container": [[13, "building-the-tensorrt-llm-container"]], "Engine Building Setups": [[13, "engine-building-setups"]], "Running on A100": [[13, "running-on-a100"]], "Reproducing First Token Latency": [[13, "reproducing-first-token-latency"]], "Benchmarking per Model": [[13, "benchmarking-per-model"]], "GPT-J 6B": [[13, "gpt-j-6b"]], "Throughput Benchmark": [[13, "throughput-benchmark"], [13, "id5"], [13, "id7"]], "First Token Latency Benchmark": [[13, "first-token-latency-benchmark"], [13, "id6"], [13, "id8"]], "Llama2-7b": [[13, "llama2-7b"]], "Llama2-70b": [[13, "llama2-70b"]], "Falcon-180B": [[13, "falcon-180b"]], "Numerical Precision": [[14, "numerical-precision"]], "FP32, FP16 and BF16": [[14, "fp32-fp16-and-bf16"]], "Quantization and Dequantization (Q/DQ)": [[14, "quantization-and-dequantization-q-dq"]], "INT8 SmoothQuant (W8A8)": [[14, "int8-smoothquant-w8a8"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[14, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "GPTQ and AWQ (W4A16)": [[14, "gptq-and-awq-w4a16"]], "FP8 (Hopper)": [[14, "fp8-hopper"]], "Support matrix": [[14, "support-matrix"]], "Technical Detail: The QuantMode Flags": [[14, "technical-detail-the-quantmode-flags"]], "Functionals": [[15, "module-tensorrt_llm"]], "Layers": [[16, "module-tensorrt_llm"]], "Activation": [[16, "module-tensorrt_llm.layers.activation"]], "Attention": [[16, "module-tensorrt_llm.layers.attention"]], "Cast": [[16, "module-tensorrt_llm.layers.cast"]], "Conv": [[16, "module-tensorrt_llm.layers.conv"]], "Embedding": [[16, "module-tensorrt_llm.layers.embedding"]], "Linear": [[16, "module-tensorrt_llm.layers.linear"]], "MLP": [[16, "module-tensorrt_llm.layers.mlp"]], "Normalization": [[16, "normalization"]], "Pooling": [[16, "module-tensorrt_llm.layers.pooling"]], "Models": [[17, "module-tensorrt_llm"]], "Plugin": [[18, "module-tensorrt_llm"]], "Quantization": [[19, "module-tensorrt_llm"]]}, "indexentries": {"nvinfer1 (c++ type)": [[2, "_CPPv48nvinfer1"]], "tensorrt_llm (c++ type)": [[2, "_CPPv412tensorrt_llm"]], "tensorrt_llm::batch_manager (c++ type)": [[2, "_CPPv4N12tensorrt_llm13batch_managerE"]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[2, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE"]], "tensorrt_llm::layers (c++ type)": [[2, "_CPPv4N12tensorrt_llm6layersE"]], "tensorrt_llm::runtime (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtimeE"]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb"]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv"]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv"]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv"]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv"]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE"]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE"]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv"]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE"]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr"]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE"]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE"]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer"]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType"]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv"]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::initmemorypool (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi"]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE"]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE"]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv"]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv"]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE"]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv"]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer"]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE"]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer"]], "tensorrt_llm::runtime::bufferrange::begin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5beginEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange5beginEv"]], "tensorrt_llm::runtime::bufferrange::cbegin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange6cbeginEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange6cbeginEv"]], "tensorrt_llm::runtime::bufferrange::cend (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange4cendEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4cendEv"]], "tensorrt_llm::runtime::bufferrange::const_iterator (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange14const_iteratorE"]], "tensorrt_llm::runtime::bufferrange::const_pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange13const_pointerE"]], "tensorrt_llm::runtime::bufferrange::const_reference (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange15const_referenceE"]], "tensorrt_llm::runtime::bufferrange::end (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange3endEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange3endEv"]], "tensorrt_llm::runtime::bufferrange::iterator (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange8iteratorE"]], "tensorrt_llm::runtime::bufferrange::mdata (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mDataE"]], "tensorrt_llm::runtime::bufferrange::msize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mSizeE"]], "tensorrt_llm::runtime::bufferrange::operator[] (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type"]], "tensorrt_llm::runtime::bufferrange::pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange7pointerE"]], "tensorrt_llm::runtime::bufferrange::reference (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange9referenceE"]], "tensorrt_llm::runtime::bufferrange::size (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4sizeEv"]], "tensorrt_llm::runtime::bufferrange::size_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange9size_typeE"]], "tensorrt_llm::runtime::bufferrange::value_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange10value_typeE"]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEventE"]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb"], [2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj"]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE"]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb"], [2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE"]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer"]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE"]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE"]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv"]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE"]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE"]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv"]], "tensorrt_llm::runtime::cudastream (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE"]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib"], [2, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji"]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE"]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb"], [2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE"]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t"]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE"]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv"]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv"]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE"]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE"]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE"], [2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent"]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv"]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE"], [2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent"]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[2, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[2, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[2, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[2, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE"]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE"]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE"]], "tensorrt_llm::runtime::decodinginput::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE"]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE"]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE"]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE"]], "tensorrt_llm::runtime::decodinginput::finished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE"]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE"]], "tensorrt_llm::runtime::decodinginput::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE"]], "tensorrt_llm::runtime::decodinginput::maxkvcachelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16maxKvCacheLengthE"]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE"]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE"]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE"]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE"]], "tensorrt_llm::runtime::decodinginput::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE"]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::isdone (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscores (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscores (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidstgt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthstgt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr"]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::finished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE"]], "tensorrt_llm::runtime::decodingoutput::finishedsteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishedStepsE"]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE"]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE"]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE"]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE"]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE"]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE"]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE"]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE"]], "tensorrt_llm::runtime::generationinput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE"]], "tensorrt_llm::runtime::generationinput::base (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE"]], "tensorrt_llm::runtime::generationinput::generationinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::generationinput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::generationoutput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE"]], "tensorrt_llm::runtime::generationoutput::base (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE"]], "tensorrt_llm::runtime::generationoutput::generationoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::generationoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationinput (c++ class)": [[2, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE"]], "tensorrt_llm::runtime::genericgenerationinput::genericgenerationinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::genericgenerationinput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationinput::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE"]], "tensorrt_llm::runtime::genericgenerationinput::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE"]], "tensorrt_llm::runtime::genericgenerationinput::endid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE"]], "tensorrt_llm::runtime::genericgenerationinput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE"]], "tensorrt_llm::runtime::genericgenerationinput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE"]], "tensorrt_llm::runtime::genericgenerationinput::maxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE"]], "tensorrt_llm::runtime::genericgenerationinput::packed (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE"]], "tensorrt_llm::runtime::genericgenerationinput::padid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE"]], "tensorrt_llm::runtime::genericgenerationinput::prompttuningparams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE"]], "tensorrt_llm::runtime::genericgenerationinput::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE"]], "tensorrt_llm::runtime::genericgenerationoutput (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE"]], "tensorrt_llm::runtime::genericgenerationoutput::callback (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE"]], "tensorrt_llm::runtime::genericgenerationoutput::genericgenerationoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::genericgenerationoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationoutput::contextlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE"]], "tensorrt_llm::runtime::genericgenerationoutput::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE"]], "tensorrt_llm::runtime::genericgenerationoutput::generationlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE"]], "tensorrt_llm::runtime::genericgenerationoutput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE"]], "tensorrt_llm::runtime::genericgenerationoutput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE"]], "tensorrt_llm::runtime::genericgenerationoutput::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE"]], "tensorrt_llm::runtime::genericgenerationoutput::ontokengenerated (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE"]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE"]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE"]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE"]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE"]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE"]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE"]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE"]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE"]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::gathertree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::gptdecoder::mallocator (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mAllocatorE"]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE"]], "tensorrt_llm::runtime::gptdecoder::mlogprobstiled (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE"]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE"]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE"]], "tensorrt_llm::runtime::gptdecoderbatch::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodinginputptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodingoutputptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::finalize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"], [2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"], [2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getallnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbsteps (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getparentids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::mactualbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbeamwidths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbuffermanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecoders (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodinginputs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodingoutputs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdrafttokenids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinishedsum (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardtoken (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE"]], "tensorrt_llm::runtime::gptdecoderbatch::mgeneratedtokensperstep (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mGeneratedTokensPerStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodinginput (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodingoutput (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxkvcachelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mMaxKvCacheLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxtokensperstep (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mMaxTokensPerStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnbsteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnumdrafttokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstreams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsizepadded (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE"]], "tensorrt_llm::runtime::gptdecoderbatch::newbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequest (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::postprocessrequest (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig"]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig"], [2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE"]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv"]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv"]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv"]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv"]], "tensorrt_llm::runtime::gptjsonconfig::mgptmodelconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE"]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE"]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE"], [2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE"], [2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE"]], "tensorrt_llm::runtime::gptmodelconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE"]], "tensorrt_llm::runtime::gptmodelconfig::gptmodelconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant (c++ enum)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kglm (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kgpt (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE"]], "tensorrt_llm::runtime::gptmodelconfig::computecontextlogits (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::computegenerationlogits (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv"]], "tensorrt_llm::runtime::gptmodelconfig::gethiddensize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxbatchsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxinputlen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxnumtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxoutputlen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxOutputLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxpromptembeddingtablesize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxtokensperstep (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmodelvariant (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbheads (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbkvheads (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnblayers (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::getquantmode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getsizeperhead (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv"]], "tensorrt_llm::runtime::gptmodelconfig::gettokensperblock (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsizepadded (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputecontextlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputegenerationlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mdatatype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::mhiddensize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::minputpacked (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxdraftlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxinputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxnumtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxoutputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxOutputLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxpromptembeddingtablesize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmodelvariant (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbheads (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbkvheads (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnblayers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedkvcache (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE"]], "tensorrt_llm::runtime::gptmodelconfig::mquantmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::mtokensperblock (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE"]], "tensorrt_llm::runtime::gptmodelconfig::musecustomallreduce (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE"]], "tensorrt_llm::runtime::gptmodelconfig::musegptattentionplugin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::mvocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxbatchsize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxdraftlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxinputlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxnumtokens (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxoutputlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxOutputLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxpromptembeddingtablesize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmodelvariant (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant"]], "tensorrt_llm::runtime::gptmodelconfig::setnbkvheads (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setquantmode (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::settokensperblock (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::supportsinflightbatching (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv"]], "tensorrt_llm::runtime::gptmodelconfig::usecustomallreduce (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv"]], "tensorrt_llm::runtime::gptmodelconfig::usegptattentionplugin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepackedinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepagedkvcache (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv"]], "tensorrt_llm::runtime::gptmodelconfig::useprompttuning (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv"]], "tensorrt_llm::runtime::gptsession (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSessionE"]], "tensorrt_llm::runtime::gptsession::config (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE"]], "tensorrt_llm::runtime::gptsession::config::config (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::config::ctxmicrobatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::cudagraphmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::config::decoderperrequest (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE"]], "tensorrt_llm::runtime::gptsession::config::genmicrobatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::kvcacheconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE"]], "tensorrt_llm::runtime::gptsession::config::maxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::maxbeamwidth (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE"]], "tensorrt_llm::runtime::gptsession::config::maxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::cudagraphexecutor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::clear (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::create (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::hasinstance (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::launch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::minstance (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::preparenextgraph (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::update (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::uploadtostream (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::~cudagraphexecutor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev"]], "tensorrt_llm::runtime::gptsession::gptsession (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr"]], "tensorrt_llm::runtime::gptsession::kvcacheconfig (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE"]], "tensorrt_llm::runtime::gptsession::kvcachemanager (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::loggerptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::microbatchconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::ctxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::genbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::getctxcontextid (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getCtxContextIdE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::getgencontextid (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig15getGenContextIdE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numctxbatches (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numctxpergen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numgenbatches (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE"]], "tensorrt_llm::runtime::gptsession::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE"]], "tensorrt_llm::runtime::gptsession::tokengeneratedcallback (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE"]], "tensorrt_llm::runtime::gptsession::createbuffers (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType"]], "tensorrt_llm::runtime::gptsession::createcontexts (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType8SizeTypeb"]], "tensorrt_llm::runtime::gptsession::createcustomallreduceworkspace (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::createdecoders (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType"]], "tensorrt_llm::runtime::gptsession::createkvcachemanager (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig"]], "tensorrt_llm::runtime::gptsession::createontokengeneratedcallback (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput"]], "tensorrt_llm::runtime::gptsession::decoderstepasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::executecontextstep (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager"]], "tensorrt_llm::runtime::gptsession::executegenerationstep (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE"]], "tensorrt_llm::runtime::gptsession::finalize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType"]], "tensorrt_llm::runtime::gptsession::generate (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptsession::generatebatched (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallback"]], "tensorrt_llm::runtime::gptsession::getbuffermanager (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv"]], "tensorrt_llm::runtime::gptsession::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv"]], "tensorrt_llm::runtime::gptsession::getlogger (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv"]], "tensorrt_llm::runtime::gptsession::getmodelconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv"]], "tensorrt_llm::runtime::gptsession::getworldconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv"]], "tensorrt_llm::runtime::gptsession::initdecoder (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType"]], "tensorrt_llm::runtime::gptsession::kvcacheaddsequences (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::mbuffers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE"]], "tensorrt_llm::runtime::gptsession::mcommevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE"]], "tensorrt_llm::runtime::gptsession::mcommptrs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE"]], "tensorrt_llm::runtime::gptsession::mcommstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE"]], "tensorrt_llm::runtime::gptsession::mcudagraphinstances (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE"]], "tensorrt_llm::runtime::gptsession::mcudagraphmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxkvcachelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession24mDecoderMaxKvCacheLengthE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::mdecoders (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE"]], "tensorrt_llm::runtime::gptsession::mdevice (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE"]], "tensorrt_llm::runtime::gptsession::mipcmemoryhandles (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE"]], "tensorrt_llm::runtime::gptsession::mkvcachemanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::mlogger (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE"]], "tensorrt_llm::runtime::gptsession::mmicrobatchconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE"]], "tensorrt_llm::runtime::gptsession::mmodelconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE"]], "tensorrt_llm::runtime::gptsession::mpipelinecomm (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE"]], "tensorrt_llm::runtime::gptsession::mreceivedevents (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE"]], "tensorrt_llm::runtime::gptsession::mruntime (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE"]], "tensorrt_llm::runtime::gptsession::mworldconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE"]], "tensorrt_llm::runtime::gptsession::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config"]], "tensorrt_llm::runtime::gptsession::shouldstopsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::usecudagraphs (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv"]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferE"]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE"]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv"]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE"]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE"]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv"], [2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv"]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv"]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv"]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv"]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv"]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv"]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv"]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer"]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv"]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev"]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE"]], "tensorrt_llm::runtime::igptdecoder::accepttokens (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12acceptTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::gathertree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeType"]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev"]], "tensorrt_llm::runtime::igptdecoderbatch (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE"]], "tensorrt_llm::runtime::igptdecoderbatch::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::igptdecoderbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv"]], "tensorrt_llm::runtime::igptdecoderbatch::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::tokenptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::finalize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"]], "tensorrt_llm::runtime::igptdecoderbatch::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getnbsteps (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::getparentids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::newrequest (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE"]], "tensorrt_llm::runtime::istatefulgptdecoder::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::istatefulgptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::finalize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getallnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnbfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType"]], "tensorrt_llm::runtime::istatefulgptdecoder::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::newbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::istatefulgptdecoder::~istatefulgptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev"]], "tensorrt_llm::runtime::itensor (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensorE"]], "tensorrt_llm::runtime::itensor::dimtype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE"]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv"]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE"]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE"]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE"]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE"]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE"]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t"]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv"]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE"]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor"]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape"]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE"]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape"]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape"]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape"]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape"]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE"]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev"]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE"]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE"]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE"]], "tensorrt_llm::runtime::ipcmemory::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE"]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::getcommptrstensor (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv"]], "tensorrt_llm::runtime::ipcmemory::mbufferptr (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE"]], "tensorrt_llm::runtime::ipcmemory::mbuffersize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE"]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE"]], "tensorrt_llm::runtime::ipcmemory::mworldconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE"]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev"]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE"]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE"]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv"]], "tensorrt_llm::runtime::memorycounters::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE"]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei"]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv"]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv"]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv"]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv"]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv"]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE"]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE"]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE"]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE"]], "tensorrt_llm::runtime::memorycounters::minstance (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters9mInstanceE"]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE"]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE"]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv"]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE"]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE"]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE"]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE"]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE"]], "tensorrt_llm::runtime::phonynameduetoerror::name (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE"]], "tensorrt_llm::runtime::phonynameduetoerror::size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE"]], "tensorrt_llm::runtime::phonynameduetoerror::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE"]], "tensorrt_llm::runtime::phonynameduetoerror::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE"]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE"]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE"]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::prompttuningparams::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE"]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE"]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb"]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE"]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE"]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE"]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType"]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE"]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE"]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE"]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE"]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE"]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE"]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE"]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE"]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE"]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE"]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE"]], "tensorrt_llm::runtime::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE"]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE"]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[2, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE"]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE"]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE"]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE"]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE"]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE"]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE"]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE"]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE"]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv"]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE"]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity"]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE"]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE"]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv"]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv"]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv"]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv"]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv"]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE"]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"], [2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::worldconfig::validconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType"]], "tensorrt_llm::runtime::buffercast (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer"], [2, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer"]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[2, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE"]], "tensorrt_llm::runtime::decoder (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoderE"]], "tensorrt_llm::runtime::decoder::input (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE"]], "tensorrt_llm::runtime::decoder::input::input (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr"]], "tensorrt_llm::runtime::decoder::input::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder::input::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::input::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE"]], "tensorrt_llm::runtime::decoder::output (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE"]], "tensorrt_llm::runtime::decoder::output::output (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv"]], "tensorrt_llm::runtime::decoder::output::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE"]], "tensorrt_llm::runtime::decoder::output::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::output::sequencelengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE"]], "tensorrt_llm::runtime::decoder_batch (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE"]], "tensorrt_llm::runtime::decoder_batch::input (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE"]], "tensorrt_llm::runtime::decoder_batch::input::input (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::input::tensorconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE"]], "tensorrt_llm::runtime::decoder_batch::input::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::input::active (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE"]], "tensorrt_llm::runtime::decoder_batch::input::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder_batch::input::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE"]], "tensorrt_llm::runtime::decoder_batch::output (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE"]], "tensorrt_llm::runtime::decoder_batch::request (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE"]], "tensorrt_llm::runtime::decoder_batch::request::bufferptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::consttensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::request (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::decoder_batch::request::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE"]], "tensorrt_llm::runtime::decoder_batch::request::computecumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE"]], "tensorrt_llm::runtime::decoder_batch::request::computelogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE"]], "tensorrt_llm::runtime::decoder_batch::request::drafttokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE"]], "tensorrt_llm::runtime::decoder_batch::request::endid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE"]], "tensorrt_llm::runtime::decoder_batch::request::generatedtokensperstep (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13decoder_batch7Request22generatedTokensPerStepEv"]], "tensorrt_llm::runtime::decoder_batch::request::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE"]], "tensorrt_llm::runtime::decoder_batch::request::inputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE"]], "tensorrt_llm::runtime::decoder_batch::request::maxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE"]], "tensorrt_llm::runtime::decoder_batch::token (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE"]], "tensorrt_llm::runtime::decoder_batch::token::token (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::token::active (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE"]], "tensorrt_llm::runtime::decoder_batch::token::event (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE"]], "tensorrt_llm::runtime::operator<< (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer"], [2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor"], [2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE"]], "tensorrt_llm::runtime::setpeeraccess (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb"]], "tensorrt_llm::runtime::utils (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime5utilsE"]], "tensorrt_llm::runtime::utils::loadengine (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE"]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[15, "tensorrt_llm.functional.AllReduceStrategy.AUTO"]], "allreducestrategy (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.AllReduceStrategy"]], "attentionmasktype (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.AttentionMaskType"]], "dimrange (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.DimRange"]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[15, "tensorrt_llm.functional.MLPType.FusedGatedMLP"]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[15, "tensorrt_llm.functional.MLPType.GatedMLP"]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[15, "tensorrt_llm.functional.LayerNormType.GroupNorm"]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[15, "tensorrt_llm.functional.LayerNormType.LayerNorm"]], "layernormpositiontype (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.LayerNormPositionType"]], "layernormtype (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.LayerNormType"]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[15, "tensorrt_llm.functional.MLPType.MLP"]], "mlptype (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.MLPType"]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[15, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT"]], "positionembeddingtype (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.PositionEmbeddingType"]], "ring (tensorrt_llm.functional.allreducestrategy attribute)": [[15, "tensorrt_llm.functional.AllReduceStrategy.RING"]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[15, "tensorrt_llm.functional.LayerNormType.RmsNorm"]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.RotaryScalingType"]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[15, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT"]], "tensor (class in tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.Tensor"]], "abs() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.abs"]], "abs() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.abs"]], "activation() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.activation"]], "add() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.add"]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.alibi"]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale"]], "allgather() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.allgather"]], "allreduce() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.allreduce"]], "arange() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.arange"]], "argmax() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.argmax"]], "assertion() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.assertion"]], "avg_pool2d() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.avg_pool2d"]], "bert_attention() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.bert_attention"]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[15, "tensorrt_llm.functional.AttentionMaskType.bidirectional"]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[15, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm"]], "broadcast_helper() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.broadcast_helper"]], "cast() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.cast"]], "cast() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.cast"]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[15, "tensorrt_llm.functional.AttentionMaskType.causal"]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.chatglm"]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.choices"]], "chunk() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.chunk"]], "clip() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.clip"]], "concat() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.concat"]], "constant() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.constant"]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.constant_to_tensor_"]], "conv2d() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.conv2d"]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.conv_transpose2d"]], "cos() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.cos"]], "div() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.div"]], "dtype (tensorrt_llm.functional.tensor property)": [[15, "tensorrt_llm.functional.Tensor.dtype"]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[15, "tensorrt_llm.functional.RotaryScalingType.dynamic"]], "einsum() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.einsum"]], "elementwise_binary() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.elementwise_binary"]], "embedding() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.embedding"]], "eq() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.eq"]], "exp() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.exp"]], "expand() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.expand"]], "expand_dims() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.expand_dims"]], "expand_dims_like() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.expand_dims_like"]], "expand_mask() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.expand_mask"]], "flip() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.flip"]], "gather() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.gather"]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.gather_last_token_logits"]], "geglu() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.geglu"]], "gelu() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.gelu"]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.generate_alibi_biases"]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.generate_alibi_slopes"]], "get_parent() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.get_parent"]], "get_users() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.get_users"]], "gpt_attention() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.gpt_attention"]], "group_norm() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.group_norm"]], "gt() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.gt"]], "identity() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.identity"]], "index_select() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.index_select"]], "interpolate() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.interpolate"]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi"]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.is_dynamic"]], "is_gated_activation() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.is_gated_activation"]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.is_rope"]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.is_trt_wrapper"]], "layer_norm() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.layer_norm"]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute"]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[15, "tensorrt_llm.functional.RotaryScalingType.linear"]], "location (tensorrt_llm.functional.tensor property)": [[15, "tensorrt_llm.functional.Tensor.location"]], "lora_plugin() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.lora_plugin"]], "lt() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.lt"]], "mark_output() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.mark_output"]], "matmul() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.matmul"]], "max() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.max"]], "max() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.max"]], "maximum() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.maximum"]], "mean() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.mean"]], "mean() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.mean"]], "minimum() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.minimum"]], "module": [[15, "module-tensorrt_llm"], [15, "module-tensorrt_llm.functional"], [16, "module-tensorrt_llm"], [16, "module-tensorrt_llm.layers.activation"], [16, "module-tensorrt_llm.layers.attention"], [16, "module-tensorrt_llm.layers.cast"], [16, "module-tensorrt_llm.layers.conv"], [16, "module-tensorrt_llm.layers.embedding"], [16, "module-tensorrt_llm.layers.linear"], [16, "module-tensorrt_llm.layers.mlp"], [16, "module-tensorrt_llm.layers.normalization"], [16, "module-tensorrt_llm.layers.pooling"], [17, "module-tensorrt_llm"], [17, "module-tensorrt_llm.models"], [18, "module-tensorrt_llm"], [18, "module-tensorrt_llm.plugin"], [19, "module-tensorrt_llm"], [19, "module-tensorrt_llm.quantization"], [20, "module-tensorrt_llm"], [20, "module-tensorrt_llm.runtime"]], "mul() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.mul"]], "name (tensorrt_llm.functional.tensor property)": [[15, "tensorrt_llm.functional.Tensor.name"]], "ndim() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.ndim"]], "network (tensorrt_llm.functional.tensor property)": [[15, "tensorrt_llm.functional.Tensor.network"]], "non_gated_version() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.non_gated_version"]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[15, "tensorrt_llm.functional.RotaryScalingType.none"]], "op_and() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.op_and"]], "op_or() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.op_or"]], "outer() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.outer"]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[15, "tensorrt_llm.functional.AttentionMaskType.padding"]], "permute() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.permute"]], "permute() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.permute"]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[15, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm"]], "pow() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.pow"]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[15, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm"]], "rank() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.rank"]], "recv() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.recv"]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.relative"]], "relu() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.relu"]], "repeat_interleave() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.repeat_interleave"]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.replace_all_uses_with"]], "rms_norm() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.rms_norm"]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox"]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[15, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj"]], "round() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.round"]], "select() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.select"]], "send() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.send"]], "shape (tensorrt_llm.functional.tensor property)": [[15, "tensorrt_llm.functional.Tensor.shape"]], "shape() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.shape"]], "sigmoid() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.sigmoid"]], "silu() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.silu"]], "sin() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.sin"]], "size() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.size"]], "slice() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.slice"]], "softmax() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.softmax"]], "softplus() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.softplus"]], "split() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.split"]], "split() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.split"]], "sqrt() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.sqrt"]], "sqrt() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.sqrt"]], "squared_relu() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.squared_relu"]], "sub() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.sub"]], "swiglu() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.swiglu"]], "tanh() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.tanh"]], "tensorrt_llm": [[15, "module-tensorrt_llm"], [16, "module-tensorrt_llm"], [17, "module-tensorrt_llm"], [18, "module-tensorrt_llm"], [19, "module-tensorrt_llm"], [20, "module-tensorrt_llm"]], "tensorrt_llm.functional": [[15, "module-tensorrt_llm.functional"]], "transpose() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.transpose"]], "transpose() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.transpose"]], "unary() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.unary"]], "unsqueeze() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.unsqueeze"]], "view() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.view"]], "view() (tensorrt_llm.functional.tensor method)": [[15, "tensorrt_llm.functional.Tensor.view"]], "where() (in module tensorrt_llm.functional)": [[15, "tensorrt_llm.functional.where"]], "attention (class in tensorrt_llm.layers.attention)": [[16, "tensorrt_llm.layers.attention.Attention"]], "attentionparams (class in tensorrt_llm.layers.attention)": [[16, "tensorrt_llm.layers.attention.AttentionParams"]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[16, "tensorrt_llm.layers.pooling.AvgPool2d"]], "bertattention (class in tensorrt_llm.layers.attention)": [[16, "tensorrt_llm.layers.attention.BertAttention"]], "cast (class in tensorrt_llm.layers.cast)": [[16, "tensorrt_llm.layers.cast.Cast"]], "columnlinear (in module tensorrt_llm.layers.linear)": [[16, "tensorrt_llm.layers.linear.ColumnLinear"]], "conv2d (class in tensorrt_llm.layers.conv)": [[16, "tensorrt_llm.layers.conv.Conv2d"]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[16, "tensorrt_llm.layers.conv.ConvTranspose2d"]], "embedding (class in tensorrt_llm.layers.embedding)": [[16, "tensorrt_llm.layers.embedding.Embedding"]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[16, "tensorrt_llm.layers.mlp.FusedGatedMLP"]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[16, "tensorrt_llm.layers.mlp.GatedMLP"]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[16, "tensorrt_llm.layers.normalization.GroupNorm"]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[16, "tensorrt_llm.layers.attention.KeyValueCacheParams"]], "layernorm (class in tensorrt_llm.layers.normalization)": [[16, "tensorrt_llm.layers.normalization.LayerNorm"]], "linear (class in tensorrt_llm.layers.linear)": [[16, "tensorrt_llm.layers.linear.Linear"]], "mlp (class in tensorrt_llm.layers.mlp)": [[16, "tensorrt_llm.layers.mlp.MLP"]], "mish (class in tensorrt_llm.layers.activation)": [[16, "tensorrt_llm.layers.activation.Mish"]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[16, "tensorrt_llm.layers.embedding.PromptTuningEmbedding"]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[16, "tensorrt_llm.layers.normalization.RmsNorm"]], "ropeembeddingutils (class in tensorrt_llm.layers.attention)": [[16, "tensorrt_llm.layers.attention.RopeEmbeddingUtils"]], "rowlinear (class in tensorrt_llm.layers.linear)": [[16, "tensorrt_llm.layers.linear.RowLinear"]], "apply_rotary_pos_emb() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[16, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb"]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[16, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm"]], "create_sinusoidal_positions() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[16, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.create_sinusoidal_positions"]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[16, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list"]], "forward() (tensorrt_llm.layers.activation.mish method)": [[16, "tensorrt_llm.layers.activation.Mish.forward"]], "forward() (tensorrt_llm.layers.attention.attention method)": [[16, "tensorrt_llm.layers.attention.Attention.forward"]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[16, "tensorrt_llm.layers.attention.BertAttention.forward"]], "forward() (tensorrt_llm.layers.cast.cast method)": [[16, "tensorrt_llm.layers.cast.Cast.forward"]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[16, "tensorrt_llm.layers.conv.Conv2d.forward"]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[16, "tensorrt_llm.layers.conv.ConvTranspose2d.forward"]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[16, "tensorrt_llm.layers.embedding.Embedding.forward"]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[16, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward"]], "forward() (tensorrt_llm.layers.linear.linear method)": [[16, "tensorrt_llm.layers.linear.Linear.forward"]], "forward() (tensorrt_llm.layers.linear.rowlinear method)": [[16, "tensorrt_llm.layers.linear.RowLinear.forward"]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[16, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[16, "tensorrt_llm.layers.mlp.GatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[16, "tensorrt_llm.layers.mlp.MLP.forward"]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[16, "tensorrt_llm.layers.normalization.GroupNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[16, "tensorrt_llm.layers.normalization.LayerNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[16, "tensorrt_llm.layers.normalization.RmsNorm.forward"]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[16, "tensorrt_llm.layers.pooling.AvgPool2d.forward"]], "get_first_kv_cache_block_pointers() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[16, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_kv_cache_block_pointers"]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[16, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value"]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[16, "tensorrt_llm.layers.attention.AttentionParams.is_valid"]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[16, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid"]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[16, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn"]], "multiply_gather() (tensorrt_llm.layers.linear.linear method)": [[16, "tensorrt_llm.layers.linear.Linear.multiply_gather"]], "multiply_reduce() (tensorrt_llm.layers.linear.rowlinear method)": [[16, "tensorrt_llm.layers.linear.RowLinear.multiply_reduce"]], "rotate_every_two() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[16, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_every_two"]], "rotate_half() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[16, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_half"]], "tensorrt_llm.layers.activation": [[16, "module-tensorrt_llm.layers.activation"]], "tensorrt_llm.layers.attention": [[16, "module-tensorrt_llm.layers.attention"]], "tensorrt_llm.layers.cast": [[16, "module-tensorrt_llm.layers.cast"]], "tensorrt_llm.layers.conv": [[16, "module-tensorrt_llm.layers.conv"]], "tensorrt_llm.layers.embedding": [[16, "module-tensorrt_llm.layers.embedding"]], "tensorrt_llm.layers.linear": [[16, "module-tensorrt_llm.layers.linear"]], "tensorrt_llm.layers.mlp": [[16, "module-tensorrt_llm.layers.mlp"]], "tensorrt_llm.layers.normalization": [[16, "module-tensorrt_llm.layers.normalization"]], "tensorrt_llm.layers.pooling": [[16, "module-tensorrt_llm.layers.pooling"]], "baichuanforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.BaichuanForCausalLM"]], "bertforquestionanswering (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.BertForQuestionAnswering"]], "bertmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.BertModel"]], "bloomforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.BloomForCausalLM"]], "bloommodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.BloomModel"]], "chatglmheadmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.ChatGLMHeadModel"]], "chatglmmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.ChatGLMModel"]], "decodermodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.DecoderModel"]], "encodermodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.EncoderModel"]], "falconforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.FalconForCausalLM"]], "falconmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.FalconModel"]], "gptjforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.GPTJForCausalLM"]], "gptjmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.GPTJModel"]], "gptlmheadmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.GPTLMHeadModel"]], "gptmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.GPTModel"]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.GPTNeoXForCausalLM"]], "gptneoxmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.GPTNeoXModel"]], "llamaforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.LLaMAForCausalLM"]], "llamamodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.LLaMAModel"]], "optlmheadmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.OPTLMHeadModel"]], "optmodel (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.OPTModel"]], "qwenforcausallm (class in tensorrt_llm.models)": [[17, "tensorrt_llm.models.QWenForCausalLM"]], "forward() (tensorrt_llm.models.baichuanforcausallm method)": [[17, "tensorrt_llm.models.BaichuanForCausalLM.forward"]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[17, "tensorrt_llm.models.BertForQuestionAnswering.forward"]], "forward() (tensorrt_llm.models.bertmodel method)": [[17, "tensorrt_llm.models.BertModel.forward"]], "forward() (tensorrt_llm.models.bloomforcausallm method)": [[17, "tensorrt_llm.models.BloomForCausalLM.forward"]], "forward() (tensorrt_llm.models.bloommodel method)": [[17, "tensorrt_llm.models.BloomModel.forward"]], "forward() (tensorrt_llm.models.chatglmheadmodel method)": [[17, "tensorrt_llm.models.ChatGLMHeadModel.forward"]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[17, "tensorrt_llm.models.ChatGLMModel.forward"]], "forward() (tensorrt_llm.models.decodermodel method)": [[17, "tensorrt_llm.models.DecoderModel.forward"]], "forward() (tensorrt_llm.models.encodermodel method)": [[17, "tensorrt_llm.models.EncoderModel.forward"]], "forward() (tensorrt_llm.models.falconforcausallm method)": [[17, "tensorrt_llm.models.FalconForCausalLM.forward"]], "forward() (tensorrt_llm.models.falconmodel method)": [[17, "tensorrt_llm.models.FalconModel.forward"]], "forward() (tensorrt_llm.models.gptjforcausallm method)": [[17, "tensorrt_llm.models.GPTJForCausalLM.forward"]], "forward() (tensorrt_llm.models.gptjmodel method)": [[17, "tensorrt_llm.models.GPTJModel.forward"]], "forward() (tensorrt_llm.models.gptlmheadmodel method)": [[17, "tensorrt_llm.models.GPTLMHeadModel.forward"]], "forward() (tensorrt_llm.models.gptmodel method)": [[17, "tensorrt_llm.models.GPTModel.forward"]], "forward() (tensorrt_llm.models.gptneoxforcausallm method)": [[17, "tensorrt_llm.models.GPTNeoXForCausalLM.forward"]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[17, "tensorrt_llm.models.GPTNeoXModel.forward"]], "forward() (tensorrt_llm.models.llamaforcausallm method)": [[17, "tensorrt_llm.models.LLaMAForCausalLM.forward"]], "forward() (tensorrt_llm.models.llamamodel method)": [[17, "tensorrt_llm.models.LLaMAModel.forward"]], "forward() (tensorrt_llm.models.optlmheadmodel method)": [[17, "tensorrt_llm.models.OPTLMHeadModel.forward"]], "forward() (tensorrt_llm.models.optmodel method)": [[17, "tensorrt_llm.models.OPTModel.forward"]], "forward() (tensorrt_llm.models.qwenforcausallm method)": [[17, "tensorrt_llm.models.QWenForCausalLM.forward"]], "prepare_inputs() (tensorrt_llm.models.baichuanforcausallm method)": [[17, "tensorrt_llm.models.BaichuanForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.bloomforcausallm method)": [[17, "tensorrt_llm.models.BloomForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.chatglmheadmodel method)": [[17, "tensorrt_llm.models.ChatGLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[17, "tensorrt_llm.models.DecoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[17, "tensorrt_llm.models.EncoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.falconforcausallm method)": [[17, "tensorrt_llm.models.FalconForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptjforcausallm method)": [[17, "tensorrt_llm.models.GPTJForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptlmheadmodel method)": [[17, "tensorrt_llm.models.GPTLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptneoxforcausallm method)": [[17, "tensorrt_llm.models.GPTNeoXForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.llamaforcausallm method)": [[17, "tensorrt_llm.models.LLaMAForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.optlmheadmodel method)": [[17, "tensorrt_llm.models.OPTLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.qwenforcausallm method)": [[17, "tensorrt_llm.models.QWenForCausalLM.prepare_inputs"]], "quantize_model() (in module tensorrt_llm.models)": [[17, "tensorrt_llm.models.quantize_model"]], "tensorrt_llm.models": [[17, "module-tensorrt_llm.models"]], "tensorrt_llm.plugin": [[18, "module-tensorrt_llm.plugin"]], "quantmode (class in tensorrt_llm.quantization)": [[19, "tensorrt_llm.quantization.QuantMode"]], "tensorrt_llm.quantization": [[19, "module-tensorrt_llm.quantization"]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.ChatGLMGenerationSession"]], "generationsequence (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.GenerationSequence"]], "generationsession (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.GenerationSession"]], "kvcachemanager (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.KVCacheManager"]], "modelconfig (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.ModelConfig"]], "modelrunner (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.ModelRunner"]], "session (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.Session"]], "tensorinfo (class in tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.TensorInfo"]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[20, "tensorrt_llm.runtime.KVCacheManager.add_sequence"]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.batch_size"]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.buffer_allocated"]], "context (tensorrt_llm.runtime.session property)": [[20, "tensorrt_llm.runtime.Session.context"]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.cross_attention"]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.cross_attention"]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode"]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard"]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.debug_mode"]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save"]], "decode() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.decode"]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.decode_batch"]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.decode_regular"]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.decode_stream"]], "device (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.device"]], "dtype (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.dtype"]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.dtype"]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[20, "tensorrt_llm.runtime.TensorInfo.dtype"]], "engine (tensorrt_llm.runtime.session property)": [[20, "tensorrt_llm.runtime.Session.engine"]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.finalize_decoder"]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.first_layer"]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[20, "tensorrt_llm.runtime.ModelRunner.from_dir"]], "from_engine() (tensorrt_llm.runtime.session static method)": [[20, "tensorrt_llm.runtime.Session.from_engine"]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[20, "tensorrt_llm.runtime.Session.from_serialized_engine"]], "gather_all_token_logits (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.gather_all_token_logits"]], "gather_all_token_logits (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.gather_all_token_logits"]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[20, "tensorrt_llm.runtime.ModelRunner.generate"]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[20, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx"]], "get_pointer_arrays() (tensorrt_llm.runtime.kvcachemanager method)": [[20, "tensorrt_llm.runtime.KVCacheManager.get_pointer_arrays"]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[20, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx"]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin"]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.handle_per_step"]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.has_position_embedding"]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.has_position_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding"]], "head_size (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.head_size"]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.head_size"]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.hidden_size"]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[20, "tensorrt_llm.runtime.Session.infer_shapes"]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.last_layer"]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.lora_plugin"]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.mapping"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size"]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.model_name"]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[20, "tensorrt_llm.runtime.TensorInfo.name"]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.num_heads"]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.num_heads"]], "num_heads_kv (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.num_heads_kv"]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.num_kv_heads"]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.num_layers"]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.num_layers"]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache"]], "paged_kv_cache (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.paged_kv_cache"]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids"]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens"]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.quant_mode"]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.quant_mode"]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[20, "tensorrt_llm.runtime.ModelRunner.remove_input_padding"]], "run() (tensorrt_llm.runtime.session method)": [[20, "tensorrt_llm.runtime.Session.run"]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[20, "tensorrt_llm.runtime.GenerationSession.runtime"]], "runtime (tensorrt_llm.runtime.session property)": [[20, "tensorrt_llm.runtime.Session.runtime"]], "set_shapes() (tensorrt_llm.runtime.session method)": [[20, "tensorrt_llm.runtime.Session.set_shapes"]], "setup() (tensorrt_llm.runtime.generationsession method)": [[20, "tensorrt_llm.runtime.GenerationSession.setup"]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[20, "tensorrt_llm.runtime.TensorInfo.shape"]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[20, "tensorrt_llm.runtime.KVCacheManager.step"]], "tensorrt_llm.runtime": [[20, "module-tensorrt_llm.runtime"]], "to_word_list_format() (in module tensorrt_llm.runtime)": [[20, "tensorrt_llm.runtime.to_word_list_format"]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.tokens_per_block"]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.tokens_per_block"]], "use_custom_all_reduce (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.use_custom_all_reduce"]], "use_custom_all_reduce (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.use_custom_all_reduce"]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin"]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin"]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[20, "tensorrt_llm.runtime.GenerationSession.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[20, "tensorrt_llm.runtime.ModelConfig.vocab_size"]]}}) |