mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-09 12:41:52 +08:00
1 line
353 KiB
JavaScript
1 line
353 KiB
JavaScript
Search.setIndex({"docnames": ["2023-05-17-how-to-add-a-new-model", "2023-05-19-how-to-debug", "_cpp_gen/runtime", "architecture", "batch_manager", "gpt_attention", "gpt_runtime", "graph-rewriting", "index", "installation", "performance", "precision", "python-api/tensorrt_llm.functional", "python-api/tensorrt_llm.layers", "python-api/tensorrt_llm.models", "python-api/tensorrt_llm.plugin", "python-api/tensorrt_llm.quantization", "python-api/tensorrt_llm.runtime"], "filenames": ["2023-05-17-how-to-add-a-new-model.md", "2023-05-19-how-to-debug.md", "_cpp_gen/runtime.rst", "architecture.md", "batch_manager.md", "gpt_attention.md", "gpt_runtime.md", "graph-rewriting.md", "index.rst", "installation.md", "performance.md", "precision.md", "python-api/tensorrt_llm.functional.rst", "python-api/tensorrt_llm.layers.rst", "python-api/tensorrt_llm.models.rst", "python-api/tensorrt_llm.plugin.rst", "python-api/tensorrt_llm.quantization.rst", "python-api/tensorrt_llm.runtime.rst"], "titles": ["How to add a new model", "How to debug", "Runtime", "TensorRT-LLM Architecture", "The Batch Manager in TensorRT-LLM", "Multi-head, Multi-query and Group-query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "Welcome to TensorRT-LLM\u2019s documentation!", "Build From Sources", "Performance of TensorRT-LLM", "Numerical Precision", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime"], "terms": {"thi": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 17], "document": [0, 1, 3, 5, 6, 9, 10, 11, 12], "describ": [0, 1, 3, 5, 6, 10, 11, 12], "tensorrt": [0, 1, 2, 5, 6, 7, 11, 12, 14, 17], "llm": [0, 1, 5, 6, 7, 11, 12], "what": 0, "provid": [0, 2, 3, 4, 5, 6, 7, 9, 10, 12], "low": [0, 5], "level": [0, 2, 3, 4], "function": [0, 1, 2, 3, 4, 5, 6, 8, 11, 17], "concat": [0, 12], "sum": [0, 6, 7, 12], "etc": [0, 2], "basic": 0, "layer": [0, 1, 2, 3, 5, 6, 7, 8, 11, 12], "linear": [0, 3, 11, 12], "layernorm": [0, 12, 13, 14], "high": [0, 3], "mlp": [0, 1, 3, 12], "attent": [0, 3, 6, 8, 12], "develop": [0, 3, 6, 9, 12], "need": [0, 3, 4, 5, 6, 7, 9, 12, 17], "implement": [0, 3, 4, 5, 6, 10, 11, 12], "creat": [0, 2, 3, 4, 6, 7, 12, 17], "directori": [0, 3, 9], "tensorrt_llm": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 16, 17], "e": [0, 2, 5, 6, 7, 11, 12], "g": 0, "bloom": [0, 6, 11], "write": 0, "py": [0, 1, 3, 7, 9, 12], "It": [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "": [0, 3, 4, 6, 7, 9, 10, 11, 12], "option": [0, 2, 6, 7, 9, 12], "us": [0, 1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 17], "usual": [1, 3, 12], "we": [1, 6, 7, 9, 12], "want": [1, 12], "print": [1, 2], "intermedi": [1, 3, 5], "tensor": [1, 2, 3, 4, 6, 10, 11, 12, 13, 14, 17], "valu": [1, 2, 3, 4, 5, 6, 11, 12, 14, 16, 17], "when": [1, 3, 4, 5, 6, 9, 11, 12, 13, 14, 17], "obei": 1, "defin": [1, 3, 4, 5, 6, 7, 11, 12], "run": [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 17], "paradigm": 1, "should": [1, 2, 7, 9, 10, 12, 17], "mark": [1, 2, 4, 6, 7, 12], "interest": 1, "network": [1, 3, 5, 7, 11, 12], "output": [1, 2, 4, 5, 7, 10, 12, 13, 17], "Then": [1, 12, 13], "runtim": [1, 5, 8, 12], "regist": [1, 4], "register_network_output": 1, "api": [1, 3, 6, 9, 12], "class": [1, 2, 3, 6, 7, 12, 13, 14, 16, 17], "modul": [1, 3, 5, 6, 8, 9, 13, 14], "def": [1, 3, 7], "__init__": [1, 3, 7], "self": [1, 3, 5, 7, 12, 14, 17], "hidden_s": [1, 7, 12, 13, 14, 17], "ffn_hidden_s": [1, 13, 14], "bia": [1, 3, 6, 12, 13, 14], "true": [1, 2, 4, 6, 7, 12, 13, 14, 17], "tp_group": [1, 12, 13, 14], "none": [1, 7, 12, 13, 14, 17], "tp_size": [1, 12, 13, 14], "1": [1, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14, 17], "super": [1, 7], "fc": [1, 3], "columnlinear": [1, 13], "gather_output": [1, 13], "fals": [1, 2, 5, 6, 7, 12, 13, 14, 17], "proj": 1, "rowlinear": [1, 13], "forward": [1, 2, 7, 13, 14], "hidden_st": [1, 12, 13, 14, 17], "inter": 1, "relu": [1, 3, 12, 14], "here": [1, 3, 7, 9, 11], "after": [1, 2, 3, 4, 5, 6, 7, 12], "return": [1, 2, 3, 4, 7, 12, 14, 17], "k": [1, 3, 5, 6, 11, 12], "v": [1, 2, 5, 6, 11, 12], "gm": 1, "named_network_output": 1, "net": 1, "_mark_output": 1, "dtype": [1, 2, 3, 7, 12, 13, 14, 17], "kei": [1, 3, 10, 17], "i": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 17], "full": [1, 4, 6], "exampl": [1, 3, 4, 5, 6, 7, 9, 11, 12, 17], "an": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 16, 17], "gpt": [1, 2, 3, 4, 5, 8, 10, 11, 12], "In": [1, 5, 7, 9, 11, 12], "residu": 1, "attention_output": 1, "data": [1, 2, 3, 10, 12], "post_layernorm": [1, 12], "mlp_output": 1, "build": [1, 3, 6, 7, 8], "net_guard": [1, 7], "set_named_paramet": 1, "tensorrt_llm_gpt": [1, 3], "named_paramet": 1, "input": [1, 2, 3, 4, 7, 10, 12, 13, 14, 17], "prepare_input": [1, 14], "arg": [1, 7], "max_batch_s": [1, 5, 12, 14], "max_input_len": [1, 14], "max_output_len": 1, "max_beam_width": [1, 5, 12, 14], "trt": [1, 3, 7, 12, 14, 17], "str_dtype_to_trt": 1, "engin": [1, 3, 4, 6, 7, 12, 17], "rm": [1, 9, 12], "rf": 1, "gpt2": [1, 6], "git": [1, 9], "clone": [1, 9], "http": [1, 5, 9, 11, 12], "huggingfac": [1, 3], "co": [1, 12], "medium": 1, "pushd": 1, "pytorch_model": 1, "bin": 1, "safetensor": 1, "wget": 1, "q": [1, 5, 6, 12], "resolv": 1, "main": [1, 4, 6, 12], "popd": 1, "python3": [1, 9], "hf_gpt_convert": 1, "o": [1, 7], "c": [1, 2, 3, 4, 5, 7], "parallel": [1, 3, 4, 6, 10, 12, 13], "storag": 1, "type": [1, 2, 3, 4, 5, 6, 7, 11, 12, 17], "float16": [1, 7], "model_dir": 1, "gpu": [1, 2, 5, 6, 9, 12, 17], "use_gpt_attention_plugin": [1, 17], "open": [1, 6], "mode": [1, 3, 4, 5, 7, 11, 12, 13, 14, 17], "decod": [1, 2, 5, 6, 17], "generationsess": [1, 17], "model_config": [1, 17], "engine_buff": [1, 17], "runtime_map": 1, "debug_mod": [1, 17], "gener": [1, 2, 3, 4, 12, 17], "info": 1, "step": [1, 2, 3, 5, 6, 7, 12, 17], "0": [1, 2, 3, 5, 6, 7, 12, 13, 14, 17], "ctx_shape": 1, "ctx_buffer": 1, "_get_context_shape_buff": 1, "input_id": [1, 14, 17], "max_input_length": 1, "input_length": [1, 12, 13, 14], "position_id": [1, 14], "last_token_id": [1, 12, 14], "attention_mask": [1, 13, 14, 17], "this_src_cache_indirect": 1, "_set_shap": 1, "context": [1, 6, 12, 17], "_set_buff": 1, "debug_buff": 1, "stream": [1, 2, 3, 4, 6, 10, 17], "torch": [1, 9, 12, 17], "cuda": [1, 2, 3, 5, 6, 9, 10, 17], "current_stream": 1, "cuda_stream": 1, "ok": 1, "_run": 1, "rais": 1, "runtimeerror": 1, "fail": [1, 17], "synchron": [1, 2, 3, 12], "6": [1, 6, 10, 12], "max_new_token": [1, 14, 17], "next_step_shap": 1, "next_step_buff": [1, 17], "_get_next_step_shape_buff": 1, "batch_siz": [1, 5, 7, 12, 17], "scfg": [1, 17], "num_beam": [1, 17], "next_src_cache_indirect": 1, "next_context": 1, "see": [1, 2, 3, 4, 5, 6, 9, 11, 12], "python": [1, 3, 6, 7, 9, 11], "8": [1, 2, 5, 10, 11, 12], "dict_kei": 1, "logit": [1, 2, 6, 12], "cache_indirect": [1, 5, 12, 13, 17], "past_key_0": 1, "past_value_0": 1, "present_key_0": 1, "present_value_0": 1, "past_key_1": 1, "past_value_1": 1, "present_key_1": 1, "present_value_1": 1, "past_key_2": 1, "past_value_2": 1, "present_key_2": 1, "present_value_2": 1, "past_key_3": 1, "past_value_3": 1, "present_key_3": 1, "present_value_3": 1, "past_key_4": 1, "past_value_4": 1, "present_key_4": 1, "present_value_4": 1, "past_key_5": 1, "past_value_5": 1, "present_key_5": 1, "present_value_5": 1, "past_key_6": 1, "past_value_6": 1, "present_key_6": 1, "present_value_6": 1, "past_key_7": 1, "past_value_7": 1, "present_key_7": 1, "present_value_7": 1, "past_key_8": 1, "past_value_8": 1, "present_key_8": 1, "present_value_8": 1, "past_key_9": 1, "past_value_9": 1, "present_key_9": 1, "present_value_9": 1, "past_key_10": 1, "past_value_10": 1, "present_key_10": 1, "present_value_10": 1, "past_key_11": 1, "past_value_11": 1, "present_key_11": 1, "present_value_11": 1, "past_key_12": 1, "past_value_12": 1, "present_key_12": 1, "present_value_12": 1, "past_key_13": 1, "past_value_13": 1, "present_key_13": 1, "present_value_13": 1, "past_key_14": 1, "past_value_14": 1, "present_key_14": 1, "present_value_14": 1, "past_key_15": 1, "past_value_15": 1, "present_key_15": 1, "present_value_15": 1, "past_key_16": 1, "past_value_16": 1, "present_key_16": 1, "present_value_16": 1, "past_key_17": 1, "past_value_17": 1, "present_key_17": 1, "present_value_17": 1, "past_key_18": 1, "past_value_18": 1, "present_key_18": 1, "present_value_18": 1, "past_key_19": 1, "past_value_19": 1, "present_key_19": 1, "present_value_19": 1, "past_key_20": 1, "past_value_20": 1, "present_key_20": 1, "present_value_20": 1, "past_key_21": 1, "past_value_21": 1, "present_key_21": 1, "present_value_21": 1, "past_key_22": 1, "past_value_22": 1, "present_key_22": 1, "present_value_22": 1, "past_key_23": 1, "past_value_23": 1, "present_key_23": 1, "present_value_23": 1, "sequence_length": [1, 12, 13, 17], "past_key_value_length": [1, 12, 13], "2": [1, 2, 5, 6, 7, 10, 12, 14, 17], "3": [1, 5, 6, 7, 10, 12], "4": [1, 6, 7, 10, 11, 12], "5": [1, 6, 10, 12], "7": [1, 6, 10, 12], "9": [1, 6, 10, 12], "10": [1, 10], "11": [1, 12], "12": [1, 10, 12], "13": [1, 12], "14": [1, 10], "15": 1, "16": [1, 10, 11], "17": 1, "18": 1, "19": 1, "20": 1, "21": 1, "22": [1, 12], "23": 1, "0295": 1, "0256": 1, "0780": 1, "0562": 1, "0241": 1, "0273": 1, "0089": 1, "5882": 1, "1989": 1, "0464": 1, "6305": 1, "5967": 1, "8793": 1, "1056": 1, "7083": 1, "0889": 1, "0714": 1, "2931": 1, "1209": 1, "0886": 1, "5927": 1, "1048": 1, "3437": 1, "1085": 1, "0752": 1, "0739": 1, "6156": 1, "3454": 1, "3014": 1, "2653": 1, "7126": 1, "9685": 1, "1145": 1, "0084": 1, "9521": 1, "1425": 1, "devic": [1, 2, 12, 17], "2129": 1, "5879": 1, "8172": 1, "7892": 1, "6887": 1, "6063": 1, "4184": 1, "0066": 1, "3895": 1, "9023": 1, "0686": 1, "2831": 1, "7935": 1, "5085": 1, "1696": 1, "5839": 1, "1375": 1, "0078": 1, "0810": 1, "1262": 1, "6260": 1, "1065": 1, "0529": 1, "7143": 1, "3322": 1, "8835": 1, "3427": 1, "8159": 1, "0622": 1, "2327": 1, "2217": 1, "2057": 1, "1475": 1, "3545": 1, "1673": 1, "1131": 1, "1268": 1, "1570": 1, "3972": 1, "8213": 1, "3282": 1, "8672": 1, "born": 1, "north": 1, "east": 1, "franc": 1, "soyer": 1, "train": [1, 3], "chef": 1, "befor": [1, 2, 3, 4, 5, 7, 9, 12, 17], "move": 1, "london": 1, "earli": 1, "If": [1, 3, 5, 6, 7, 9, 12], "you": [1, 3, 5, 6, 7, 9, 12], "plugin": [1, 5, 6, 7, 8, 9, 11, 12], "can": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 17], "set": [1, 2, 4, 5, 6, 7, 12, 13, 17], "environ": [1, 4, 6, 9], "variabl": [1, 4, 6], "cuda_launch_block": 1, "so": [1, 2, 5, 7, 9, 12], "kernel": [1, 2, 3, 5, 6, 10, 12], "ar": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 17], "launch": [1, 2, 3, 4, 6], "statu": 1, "check": [1, 6, 12], "immedi": [1, 5], "memori": [1, 2, 3, 4, 5, 6, 12, 17], "make": [1, 3, 5, 7, 12], "sure": [1, 12], "respect": [1, 11, 12], "time": [1, 2, 3, 4, 9, 12, 17], "shape": [1, 2, 3, 5, 6, 7, 11, 12, 14, 17], "thei": [1, 3, 5, 6, 9, 11, 12], "resid": 1, "correct": [1, 5], "cpu": [1, 2, 3, 12], "namespac": [2, 4, 6], "includ": [2, 3, 4, 5, 6, 9, 10, 11], "A": [2, 3, 4, 5, 6, 12], "helper": [2, 12], "manag": [2, 3, 5, 6, 8, 17], "host": [2, 9, 12], "public": 2, "ibufferptr": 2, "uniqueptr": 2, "itensorptr": 2, "cudastreamptr": 2, "std": [2, 4, 6], "shared_ptr": [2, 4], "explicit": [2, 6, 12], "construct": [2, 3], "paramet": [2, 3, 4, 5, 12, 13], "The": [2, 3, 5, 7, 8, 9, 10, 12, 13], "all": [2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 17], "oper": [2, 3, 5, 6, 7, 10, 12], "alloc": [2, 4, 5, 6, 12, 17], "de": 2, "copi": [2, 4, 6, 9, 12, 17], "size_t": [2, 6], "size": [2, 5, 6, 10, 12, 13, 14], "nvinfer1": 2, "datatyp": [2, 3, 6, 12, 14, 17], "kbyte_typ": 2, "const": [2, 4, 6], "given": [2, 4, 6, 11, 12, 14, 17], "dim": [2, 12], "dimens": [2, 5, 6, 12, 13, 14], "memorytyp": 2, "inlin": 2, "emptybuff": 2, "empti": [2, 4], "mai": [2, 3, 4, 5, 6, 9, 12], "resiz": 2, "later": [2, 3], "emptytensor": 2, "reshap": 2, "void": [2, 3, 4, 6], "setzero": 2, "buffer": [2, 6, 12], "content": [2, 12], "zero": [2, 6, 11, 12, 13, 14], "src": [2, 3, 12], "dst": 2, "srctype": 2, "dsttype": 2, "copyfrom": 2, "new": [2, 4, 5, 6, 7, 8, 12, 17], "potenti": [2, 4], "differ": [2, 3, 5, 6, 10, 11, 12], "templat": [2, 3], "typenam": [2, 3], "t": [2, 3, 5, 12], "vector": [2, 6, 12], "getstream": 2, "get": [2, 6, 7, 9, 12, 17], "underli": 2, "static": [2, 9, 12, 17], "pin": 2, "attribut": [2, 7], "constexpr": 2, "auto": [2, 3, 4, 5, 6, 12], "kuint8": 2, "privat": [2, 6], "member": [2, 3, 6, 7, 12], "mstream": 2, "initmemorypool": 2, "int": [2, 3, 6, 12, 13, 14, 17], "typedef": 2, "sizetyp": 2, "int32_t": [2, 4, 12], "tokenidtyp": 2, "stringptrmap": 2, "unordered_map": 2, "string": [2, 4, 6, 12, 17], "pointer": [2, 4, 6, 12, 17], "cudaevent_t": 2, "unsign": [2, 4], "flag": [2, 5, 6, 12], "cudaeventdisabletim": 2, "event": 2, "destroi": 2, "destructor": [2, 4], "creation": [2, 12], "By": [2, 6, 9], "default": [2, 6, 9, 12, 17], "disabl": [2, 4, 5, 6, 12], "bool": [2, 4, 6, 7, 12, 13, 14, 17], "ownsev": 2, "pass": [2, 4, 5, 6, 7, 12, 13], "exist": [2, 6, 17], "object": [2, 3, 6, 12, 13, 14, 17], "whether": [2, 12, 13], "own": [2, 3, 4, 6, 9], "associ": [2, 4, 6, 12], "element_typ": 2, "remove_pointer_t": 2, "eventptr": 2, "unique_ptr": 2, "delet": 2, "mevent": 2, "mownsev": 2, "cudastreamnonblock": 2, "prioriti": 2, "current": [2, 4, 5, 6, 10, 12, 17], "cudastreamcreatewithflag": 2, "list": [2, 3, 4, 6, 7, 9, 12, 13, 14, 17], "valid": [2, 10, 12], "lower": [2, 6, 7, 12], "number": [2, 3, 4, 5, 6, 10, 11, 12], "repres": [2, 12, 17], "higher": [2, 3, 4, 6], "cudadevicegetstreampriorityrang": 2, "more": [2, 3, 4, 5, 6, 7, 9, 10, 12], "inform": [2, 5, 6], "about": 2, "meaning": 2, "cudastream_t": 2, "ownsstream": 2, "which": [2, 4, 5, 6, 7, 9, 12, 14, 17], "wa": [2, 6, 11, 13], "getdevic": 2, "record": [2, 7], "wait": [2, 4], "streamptr": 2, "mdevic": 2, "mownsstream": 2, "tensorptr": 2, "maxlength": 2, "batchsiz": [2, 6], "endid": [2, 6], "sequencelimitlength": 2, "embeddingbia": 2, "length": [2, 5, 6, 10, 12], "badwordslist": [2, 6], "stopwordslist": [2, 6], "norepeatngrams": 2, "cacheindirect": 2, "sharedptr": 2, "id": [2, 4, 6, 12, 17], "newtoken": 2, "finish": [2, 6, 17], "finishedsum": 2, "logprob": [2, 6], "cumlogprob": 2, "parentid": 2, "beamhypothes": 2, "float": [2, 3, 6, 11, 12, 17], "knegativeinfin": 2, "1e20f": 2, "beamwidth": [2, 6], "maxsequencelength": [2, 6], "releas": [2, 5, 6, 10, 11, 12], "init": [2, 9], "slice": [2, 12], "batchindex": 2, "outputidstgt": 2, "sequencelengthstgt": 2, "normedscor": 2, "minnormedscor": 2, "numbeam": 2, "isdon": 2, "padid": [2, 6], "pack": [2, 6, 12], "embeddingbiasopt": [2, 6], "maxnewtoken": [2, 6], "callback": [2, 6], "contextlogit": [2, 6], "ontokengener": [2, 6], "dynamicdecodelay": 2, "virtual": [2, 13], "igptdecod": 2, "vocabs": [2, 6], "vocabsizepad": [2, 6], "setup": [2, 17], "overrid": 2, "forwardasync": 2, "mmanag": 2, "cudaalloc": 2, "malloc": 2, "mdynamicdecodelay": 2, "subclass": 2, "gathertre": 2, "finaloutputid": 2, "support": [2, 4, 5, 12], "flight": [2, 5], "batch": [2, 8, 10, 12, 17], "maxbatchs": [2, 6], "maxbeamwidth": [2, 4], "call": [2, 3, 4, 5, 6, 7, 12, 17], "newrequest": 2, "batchidx": 2, "decoder_batch": 2, "request": [2, 3, 5, 6, 12], "initi": [2, 6], "newbatch": 2, "tokenptr": 2, "one": [2, 4, 5, 6, 7, 9, 12, 17], "without": [2, 3, 5, 12], "block": [2, 3, 5, 6, 12, 17], "process": [2, 3, 4, 5, 6, 12], "token": [2, 4, 5, 6, 10, 11, 12, 13, 17], "forwardsync": 2, "complet": [2, 4, 6], "thread": [2, 4, 5], "isfinishedsync": 2, "last": [2, 4, 5, 6, 12], "sequenc": [2, 3, 5, 6, 7, 12, 17], "have": [2, 4, 5, 6, 12], "getfinish": 2, "indic": [2, 4, 5, 6, 12, 13], "getoutputid": 2, "maxinputlength": [2, 6], "contain": [2, 3, 4, 5, 6, 7, 11, 12, 14, 17], "pad": [2, 4, 6, 7, 12, 13], "tupl": [2, 12, 13, 17], "getfinaloutputid": 2, "execut": [2, 3, 6, 9, 12, 17], "postprocessrequest": 2, "outputid": 2, "result": [2, 3, 4, 5, 6, 10, 12, 13], "onli": [2, 3, 4, 5, 6, 7, 9, 12, 13, 17], "avail": [2, 3, 7, 9, 11], "getparentid": 2, "parent": 2, "collect": [2, 3, 4, 7, 10, 12], "dure": [2, 3, 5, 6, 7], "beam": [2, 3, 4, 6, 12], "search": [2, 3, 6, 8, 12], "getfinishedbeam": 2, "per": [2, 5, 6, 11, 12], "getoutputlength": 2, "total": [2, 5, 6], "getcumlogprob": 2, "cumul": 2, "log": [2, 6], "probabl": [2, 6], "getnewtoken": 2, "getnbstep": 2, "each": [2, 3, 4, 5, 6, 7, 11, 12, 17], "getnbfinish": 2, "gptdecoderptr": 2, "decodinginputptr": 2, "decodingoutputptr": 2, "gather": [2, 12], "final": [2, 4, 6, 12], "mvocabs": 2, "mvocabsizepad": 2, "mbuffermanag": 2, "mforwardtoken": 2, "mforwardev": 2, "mdecod": 2, "mdecodinginput": 2, "mdecodingoutput": 2, "mjointdecodinginput": 2, "mjointdecodingoutput": 2, "mnbstep": 2, "mfinish": 2, "mfinishedsum": 2, "mmaxnewtoken": 2, "mbeamwidth": 2, "mmaxsequencelength": 2, "mactualbatchs": 2, "name": [2, 4, 6, 7, 12, 17], "precis": [2, 6, 8], "tensorparallel": [2, 6], "pipelineparallel": [2, 6], "modelconfig": [2, 6, 17], "getmodelconfig": 2, "getnam": 2, "getprecis": 2, "gettensorparallel": 2, "getpipelineparallel": 2, "getworlds": 2, "enginefilenam": 2, "model": [2, 4, 5, 8, 10, 11, 12], "pars": 2, "json": [2, 4], "istream": 2, "filesystem": 2, "path": [2, 4, 5, 9, 12], "mname": 2, "mprecis": 2, "mtensorparallel": 2, "mpipelineparallel": 2, "mgptmodelconfig": 2, "enum": 2, "modelvari": 2, "enumer": [2, 12, 16], "kgpt": 2, "kglm": 2, "nblayer": 2, "nbhead": 2, "hiddens": [2, 6], "getvocabs": 2, "noexcept": 2, "getvocabsizepad": 2, "worldsiz": [2, 6], "getnblay": 2, "getnbhead": 2, "getnbkvhead": 2, "setnbkvhead": 2, "nbkvhead": 2, "gethiddens": 2, "getsizeperhead": 2, "getdatatyp": 2, "usegptattentionplugin": [2, 6], "usepackedinput": 2, "inputpack": [2, 6], "usepagedkvcach": 2, "pagedkvcach": [2, 6], "gettokensperblock": 2, "settokensperblock": 2, "tokensperblock": [2, 6], "quantmod": [2, 5, 6, 12, 13, 14, 16, 17], "getquantmod": 2, "setquantmod": 2, "supportsinflightbatch": 2, "getmaxbatchs": 2, "setmaxbatchs": 2, "getmaxinputlen": 2, "setmaxinputlen": 2, "maxinputlen": [2, 6], "getmaxoutputlen": 2, "setmaxoutputlen": 2, "maxoutputlen": [2, 6], "getmaxnumtoken": 2, "setmaxnumtoken": 2, "maxnumtoken": 2, "computecontextlogit": 2, "getmodelvari": 2, "setmodelvari": 2, "usecustomallreduc": 2, "customallreduc": 2, "mnblayer": 2, "mnbhead": 2, "mnbkvhead": 2, "mhiddens": 2, "mdatatyp": 2, "musegptattentionplugin": 2, "minputpack": 2, "mpagedkvcach": 2, "mtokensperblock": 2, "mquantmod": 2, "mmaxbatchs": 2, "mmaxinputlen": 2, "mmaxoutputlen": 2, "mmaxnumtoken": 2, "mcomputecontextlogit": 2, "mmodelvari": 2, "musecustomallreduc": 2, "batch_manag": [2, 4], "kv_cache_manag": 2, "loggerptr": 2, "ilogg": 2, "enginebuff": [2, 6], "engines": [2, 6], "logger": [2, 6], "nullptr": 2, "uint8_t": [2, 6], "enginefil": 2, "getlogg": 2, "getbuffermanag": 2, "getworldconfig": 2, "iscudagraphmod": 2, "setcudagraphmod": 2, "decoderperrequest": [2, 6], "maxtokensinpagedkvcach": [2, 6], "nullopt": 2, "nummicrobatch": 2, "width": [2, 4, 5, 6], "smaller": [2, 12], "than": [2, 5, 6, 7, 12], "devid": 2, "micro": 2, "kvcachemanag": [2, 5, 17], "generatesinglebatch": 2, "generatemultibatch": 2, "createcontext": 2, "createbuff": 2, "createdecod": 2, "logitstyp": 2, "createkvcachemanag": 2, "createcustomallreduceworkspac": 2, "decoderstepasync": 2, "decoderstep": 2, "microbatchid": 2, "pp": [2, 6], "rank": [2, 4, 5, 6, 12], "receiv": [2, 4, 12], "other": [2, 3, 5, 6, 9, 12], "shouldstopsync": 2, "shouldstop": 2, "finalizeoutputid": 2, "send": [2, 3, 12], "them": [2, 7], "first": [2, 3, 5, 6, 7, 9, 10, 12], "asynchron": 2, "requir": [2, 3, 5, 6, 9, 12], "access": [2, 4], "kvcacheaddsequ": 2, "initnewtoken": 2, "mmodelconfig": 2, "mworldconfig": 2, "ncclcommun": 2, "mpipelinecomm": 2, "mcommstream": 2, "mcommev": 2, "mdecodermaxsequencelength": 2, "mlogger": 2, "tllmruntim": [2, 6], "mruntim": 2, "mnummicrobatch": 2, "runtimebuff": 2, "mbuffer": 2, "mkvcachemanag": 2, "mreceivedev": 2, "mcudagraphmod": 2, "arrai": [2, 12, 17], "cudagraphexecutor": 2, "mcudagraphinst": 2, "hasinst": 2, "clear": [2, 17], "preparenextgraph": 2, "nextcontextid": 2, "cudagraphexecptr": 2, "cudagraphexec_t": 2, "cudagraph_t": 2, "graph": [2, 3, 8, 12], "updat": [2, 3, 9], "uploadtostream": 2, "minstanc": 2, "util": [2, 3, 4, 6], "loadengin": 2, "enginepath": 2, "struct": 2, "memorytypestr": 2, "kgpu": 2, "kcpu": 2, "kpin": 2, "cppdatatyp": 2, "kfloat": [2, 3], "khalf": 2, "half": [2, 3, 12, 14], "kint8": 2, "int8_t": 2, "kint32": 2, "kint64": 2, "int64_t": 2, "uint32_t": 2, "uint64_t": [2, 4], "kunsign": 2, "kbool": 2, "trtdatatyp": 2, "bufferdatatyp": 2, "pointerelementtyp": 2, "remove_reference_t": 2, "remove_const_t": 2, "constpointercast": 2, "ptr": [2, 12], "d": [2, 4, 12], "buffercast": 2, "ostream": 2, "wrapper": [2, 7], "around": 2, "_unsign": 2, "ispoint": 2, "isunsign": 2, "getsiz": 2, "ktrtpointertyp": 2, "munsign": 2, "mpointer": 2, "bufferrang": 2, "value_typ": 2, "size_typ": 2, "refer": [2, 3, 7, 10, 12], "const_refer": 2, "const_point": 2, "iter": [2, 3, 4, 17], "const_iter": 2, "begin": 2, "end": [2, 3, 4, 5, 6, 10, 12], "cbegin": 2, "cend": 2, "index": [2, 6, 8, 12], "mdata": 2, "msize": 2, "kdatatyp": 2, "kisunsign": 2, "kispoint": 2, "For": [2, 3, 4, 5, 6, 7, 9, 12, 17], "convert": [2, 3], "uniqueconstptr": 2, "sharedconstptr": 2, "element": [2, 5, 6, 11, 12], "getsizeinbyt": 2, "byte": [2, 6], "getcapac": 2, "capac": 2, "getmemorytyp": 2, "newsiz": 2, "op": [2, 7, 12], "equal": [2, 6, 12], "reset": [2, 6, 17], "Not": 2, "allow": [2, 4, 6], "offset": [2, 11, 12], "view": [2, 12], "same": [2, 3, 4, 6, 7, 12, 17], "tconstptr": 2, "enable_if_t": 2, "is_const_v": 2, "independ": 2, "wrap": [2, 3, 12], "cannot": [2, 3, 6, 12], "beyond": 2, "determin": [2, 5, 11, 12, 14], "protect": 2, "tobyt": 2, "kunderlyingtyp": 2, "actual": [2, 7, 12], "base": [2, 3, 6, 12, 13, 14, 16, 17], "activ": [2, 3, 4, 6, 7, 11, 12], "also": [2, 3, 4, 5, 6, 7, 9, 11, 12], "reshapebuff": 2, "latest": [2, 9], "sequencelength": 2, "its": [2, 3, 4, 5, 6, 7, 12], "getshap": 2, "volum": [2, 9], "squeez": 2, "remov": [2, 3, 5, 6, 7, 9, 12], "unit": [2, 9], "from": [2, 3, 4, 5, 6, 7, 8, 12, 17], "nbdim": 2, "volumenonneg": 2, "throw": 2, "w": [2, 11], "r": 2, "makeshap": 2, "initializer_list": 2, "conveni": 2, "tostr": 2, "setpeeraccess": 2, "enabl": [2, 3, 4, 5, 6, 7, 9, 11, 12, 13], "ipcmemori": [2, 12], "buffers": 2, "getcommptrstensor": 2, "flags_siz": 2, "max_all_reduce_block": 2, "sizeof": 2, "allocateipcmemori": 2, "destroyipcmemori": 2, "mcommptr": 2, "mbuffers": 2, "mbufferptr": 2, "difftyp": 2, "ptrdiff_t": 2, "getgpu": 2, "getcpu": 2, "getpin": 2, "getgpudiff": 2, "getcpudiff": 2, "getpinneddiff": 2, "dealloc": 2, "getinst": 2, "bytestostr": 2, "mgpu": 2, "mcpu": 2, "mpin": 2, "mgpudiff": 2, "mcpudiff": 2, "mpinneddiff": 2, "thread_loc": 2, "optvec": 2, "floattyp": 2, "temperatur": [2, 6, 17], "minlength": [2, 6], "repetitionpenalti": [2, 6], "presencepenalti": [2, 6], "topk": [2, 6], "topp": [2, 6], "long": 2, "randomse": [2, 6], "toppdecai": [2, 6], "toppmin": [2, 6], "toppresetid": [2, 6], "beamsearchdiversityr": [2, 6], "lengthpenalti": [2, 6], "sever": [2, 7, 12, 17], "asciichar": 2, "msg": 2, "getlevel": 2, "setlevel": 2, "gpuspernod": [2, 6], "kdefaultgpuspernod": 2, "istensorparallel": 2, "ispipelineparallel": 2, "getrank": 2, "getgpuspernod": 2, "getpipelineparallelrank": 2, "gettensorparallelrank": 2, "isfirstpipelineparallelrank": 2, "islastpipelineparallelrank": 2, "getpipelineparallelgroup": 2, "validconfig": 2, "mpi": [2, 3, 4, 6], "mrank": 2, "mgpuspernod": 2, "toolkit": 3, "assembl": 3, "optim": [3, 5, 6, 12], "solut": 3, "perform": [3, 4, 5, 6, 7, 8, 12], "larg": [3, 5, 10, 12], "languag": [3, 6, 12], "infer": [3, 6, 11, 12, 17], "offer": 3, "effici": [3, 5, 6, 10], "nvidia": [3, 9, 12], "compon": [3, 4, 5], "those": [3, 5, 6, 11, 12], "well": [3, 5, 6, 11], "backend": [3, 4], "triton": 3, "server": 3, "easili": [3, 12], "web": 3, "servic": 3, "configur": [3, 5], "through": [3, 4, 5, 6, 7], "As": [3, 5, 7, 11, 12], "user": [3, 4, 5, 6, 7, 9, 10, 11, 12], "veri": [3, 6], "either": [3, 4, 12], "your": [3, 6, 9], "select": [3, 12], "pre": [3, 5], "onc": [3, 6, 7, 9, 12], "must": [3, 4, 5, 6, 9, 11, 12, 13, 17], "framework": [3, 12], "outsid": 3, "scope": 3, "checkpoint": [3, 11], "download": 3, "variou": [3, 5], "To": [3, 5, 9, 11, 12], "illustr": [3, 7], "point": [3, 4, 5, 6, 10, 11, 12], "lot": [3, 5], "obtain": [3, 4, 12], "hub": 3, "nemo": [3, 11], "pytorch": [3, 7, 12], "equip": 3, "recreat": 3, "wai": [3, 7], "eas": 3, "alreadi": [3, 5, 6, 7], "hand": 3, "standard": [3, 12], "togeth": [3, 5, 6, 11], "along": [3, 5, 12], "extens": 3, "sampl": [3, 5], "top": [3, 5, 6], "p": [3, 6, 12], "exhaust": 3, "found": [3, 6, 7, 9, 11], "recommend": [3, 5, 6, 9], "onlin": [3, 10], "serv": [3, 4], "explain": [3, 6, 11, 12], "mention": [3, 6], "abov": [3, 4, 6, 9], "ha": [3, 4, 5, 6, 9, 11, 12], "built": [3, 6, 9, 12], "power": 3, "represent": [3, 6, 7, 10], "deep": [3, 12], "neural": [3, 7], "becom": [3, 5, 7], "familiar": [3, 6], "core": [3, 7], "concept": 3, "section": [3, 12], "proceed": 3, "further": 3, "builder": 3, "That": [3, 4, 5, 6, 10, 12], "instanc": [3, 4, 6, 7, 12], "create_network": 3, "method": [3, 5, 6, 11], "inetworkdefinit": [3, 7], "popul": [3, 4, 5, 6, 12], "free": 3, "simpl": [3, 6, 7, 9], "insert": [3, 7, 12], "iactivationlay": 3, "act_typ": [3, 12], "activationtyp": [3, 12], "default_trtnet": 3, "add_activ": 3, "trt_tensor": [3, 12], "_create_tensor": 3, "get_output": [3, 7], "even": [3, 4, 5, 6, 12], "easier": 3, "few": [3, 10], "most": [3, 6, 12], "deriv": 3, "partial": 3, "sigmoid": [3, 12], "special": [3, 4, 5], "advanc": [3, 9], "silu": [3, 12], "travers": 3, "transform": [3, 5], "expos": [3, 4, 6], "ilay": [3, 7], "next": [3, 10, 17], "done": [3, 12], "build_engin": 3, "build_serialized_network": 3, "everyth": 3, "work": [3, 4, 5, 6, 7, 9, 11, 12], "expect": [3, 6, 12], "produc": [3, 6, 7, 12], "ihostmemori": 3, "store": [3, 5, 6, 11, 12], "binari": [3, 12], "file": [3, 4, 7], "emb": 3, "known": [3, 4, 5, 12], "reason": [3, 5, 6], "bound": [3, 6, 12, 17], "lead": [3, 4, 7], "code": [3, 4, 5, 6, 7, 9, 11, 12], "like": [3, 4, 5, 6, 7, 11, 12], "two": [3, 4, 5, 6, 7, 10, 12, 17], "out_featur": [3, 13], "in_featur": [3, 13], "fromfil": 3, "note": [3, 4, 6, 7, 11, 12, 17], "refit": 3, "featur": [3, 4, 6, 7], "refit_engin": 3, "One": [3, 12], "techniqu": [3, 4, 5, 11], "improv": [3, 5, 6, 10], "help": [3, 4, 7, 9, 10], "reduc": [3, 4, 5, 9, 12], "amount": [3, 17], "transfer": 3, "between": [3, 5, 6, 12], "dram": 3, "comput": [3, 5, 6, 12], "locat": [3, 6, 7, 9, 12], "multiprocessor": 3, "overhead": 3, "small": [3, 5], "addit": [3, 9, 11, 12], "cost": 3, "classic": 3, "matrix": [3, 5, 6, 12], "multipl": [3, 4, 5, 6, 7, 12], "matmul": [3, 5, 11, 12], "preced": 3, "written": 3, "b": [3, 7, 12], "global": [3, 5], "read": [3, 4, 5, 6], "again": 3, "suboptim": 3, "why": [3, 12], "identifi": [3, 4, 6, 12], "automat": [3, 4, 6, 7, 11, 12], "appli": [3, 5, 7, 11, 12], "With": [3, 5, 6], "instead": [3, 6, 7, 9, 12], "twice": 3, "fuse": [3, 5], "algorithm": [3, 5, 6], "possibl": [3, 6, 9, 10, 12], "almost": 3, "infinit": 3, "some": [3, 6, 7], "involv": [3, 13], "modif": [3, 7], "flash": [3, 5], "multihead": [3, 5, 12], "mani": [3, 4, 5, 12], "arithmet": 3, "bmm": 3, "softmax": [3, 5, 12], "where": [3, 5, 6, 11, 12, 17], "stand": [3, 10], "product": [3, 5, 6, 12], "interleav": [3, 5], "loop": [3, 4, 6], "non": [3, 4, 5, 12], "trivial": 3, "necessarili": 3, "someth": 3, "discov": 3, "might": [3, 6, 9], "polyhedr": 3, "alwai": [3, 5, 6], "risk": [3, 4], "uncommon": 3, "overcom": [3, 5], "inevit": 3, "limit": [3, 5, 6, 7, 12], "mechan": [3, 4], "map": [3, 4, 5, 7, 12, 14, 17], "cpp": [3, 6, 8, 9, 12], "follow": [3, 4, 6, 7, 9, 10, 11, 12], "interfac": 3, "extend": [3, 12], "custom": [3, 6, 12], "guid": [3, 12], "within": [3, 12], "trigger": [3, 5, 6, 7], "encapsul": [3, 5, 6, 12], "fairli": 3, "quantizetensorplugin": 3, "enqueu": [3, 17], "inputdesc": 3, "invokequant": 3, "els": [3, 12], "quantiz": [3, 5, 6, 12, 13, 14, 16, 17], "cu": 3, "quantizedkernel": 3, "grid": 3, "detail": [3, 4, 5, 12], "how": [3, 4, 6, 8, 9, 11], "head": [3, 6, 8, 12], "queri": [3, 6, 8, 12], "group": [3, 6, 8, 11, 12, 13], "role": 3, "load": 3, "drive": 3, "typic": [3, 6, 7], "regress": [3, 4, 5, 6], "charg": [3, 6], "both": [3, 4, 5, 6, 7, 11, 12, 13], "bodi": 3, "design": 3, "singl": [3, 4, 5, 6, 10, 11, 12], "system": [3, 4, 6], "add": [3, 5, 6, 7, 8, 9, 12, 17], "commun": [3, 4, 6], "primit": 3, "nccl": [3, 12], "librari": [3, 6, 9], "presenc": [3, 6], "connect": 3, "nvswitch": 3, "dgx": [3, 6], "ncclplugin": 3, "allreduc": [3, 12], "allgath": [3, 12], "tgt": [3, 12], "recv": [3, 12], "pipelin": [3, 4, 6], "former": 3, "split": [3, 6, 12, 17], "across": [3, 5, 6, 7, 12], "entir": [3, 12], "sibbl": 3, "distribut": [3, 5, 6, 9, 12], "subset": [3, 6, 12], "happen": [3, 6], "boundari": [3, 6], "balanc": [3, 6], "bandwdith": 3, "bandwidth": [3, 6], "incur": 3, "issu": [3, 5], "less": [3, 5, 6, 12], "term": [3, 12], "continu": [3, 4, 5, 6], "throughput": 3, "reli": [4, 7, 11], "aim": 4, "queue": 4, "elimin": 4, "inclus": [4, 12], "newli": [4, 6], "arriv": 4, "via": [4, 12], "hook": 4, "softwar": [4, 5, 6], "client": [4, 9], "text": [4, 6], "interact": 4, "Their": 4, "signatur": [4, 7, 12], "h": [4, 6, 12], "entri": 4, "getinferencerequestscallback": 4, "inferencerequest": 4, "take": [4, 5, 6, 7, 12, 13], "maximum": [4, 5, 6, 12], "accept": [4, 6, 9, 12], "neg": [4, 12], "unbound": 4, "64": [4, 6, 10, 17], "bit": [4, 5, 6, 11], "uniqu": [4, 6, 12], "respons": [4, 12], "deliv": [4, 10], "sendresponsecallback": 4, "conform": 4, "boolean": [4, 6, 12], "error": [4, 6], "messag": [4, 12], "been": [4, 5], "encount": 4, "case": [4, 5, 6, 11, 12], "properli": 4, "handl": [4, 12], "Its": [4, 5, 6, 12], "reject": 4, "ani": [4, 7, 17], "sent": 4, "correspond": [4, 5, 6, 7, 11, 12, 13], "being": [4, 5, 6, 10], "reus": 4, "appear": [4, 5, 6, 12], "third": [4, 6], "argument": [4, 6, 9, 12], "stop": [4, 6, 7], "pollstopsignalcallback": 4, "unordered_set": 4, "ensur": [4, 7], "report": 4, "returnbatchmanagerstatscallback": 4, "packag": 4, "three": [4, 6, 11, 12], "field": [4, 6, 11], "timestamp": 4, "put_tim": 4, "tm": 4, "m": [4, 10, 11, 12], "y": [4, 9, 11], "counter": 4, "count": 4, "integr": 4, "pool": [4, 5, 17], "item": [4, 17], "spawn": 4, "worker": 4, "constructor": [4, 6], "persist": 4, "invok": [4, 6, 7], "start": [4, 7, 12], "intend": 4, "back": 4, "safe": [4, 7], "retir": 4, "notifi": 4, "final_respons": 4, "intern": [4, 5], "state": [4, 5, 6, 7, 12], "relat": [4, 9, 12], "freed": 4, "batchmanag": 4, "pathtotrtengin": 4, "trtgptmodeltyp": 4, "inflightbatch": 4, "schedulerpolici": 4, "schedul": 4, "polici": 4, "below": [4, 5, 6, 7, 10], "maxnumrequest": 4, "getinferencerequestscb": 4, "sendresponsecb": 4, "adjust": 4, "try": 4, "maxim": 4, "aggress": 4, "max_util": 4, "paus": 4, "short": [4, 5, 17], "kv": [4, 6, 12], "cach": [4, 6, 11, 12], "resum": 4, "visibl": [4, 12], "effect": 4, "increas": [4, 6, 12], "latenc": [4, 5, 6], "adopt": [4, 6, 7], "conserv": 4, "approach": [4, 7], "know": 4, "suffici": 4, "worst": 4, "consumpt": [4, 5], "guaranteed_no_evict": 4, "termin": 4, "altern": 4, "requestid": 4, "interpret": 4, "signal": 4, "invoc": 4, "waituntiltermin": 4, "assum": [4, 12], "node": [4, 6, 11, 12], "control": [4, 5, 6, 7, 11, 12], "cuda_visible_devic": 4, "care": 4, "taken": 4, "broadcast": [4, 12], "seen": 4, "hold": [4, 7], "ident": [4, 12], "good": 4, "mha": [5, 12], "multiqueri": 5, "mqa": [5, 12], "gqa": [5, 12], "quick": 5, "remind": 5, "anoth": [5, 7, 12], "articl": 5, "arxiv": [5, 11, 12], "org": [5, 11, 12], "ab": [5, 11, 12], "1911": 5, "02150": 5, "2307": 5, "09288": 5, "variant": [5, 12], "fewer": 5, "gpt_attent": [5, 7, 12], "discuss": 5, "faster": 5, "futur": [5, 9, 11, 12], "remove_input_pad": [5, 12, 13, 17], "shorter": [5, 6], "max_sequence_length": 5, "excess": 5, "unneed": 5, "surround": 5, "problem": [5, 9], "1d": [5, 12], "context_fmha_typ": 5, "slowest": 5, "footprint": 5, "signific": 5, "quadrat": 5, "depend": [5, 6, 7, 9, 12], "otherwis": [5, 6, 12], "enabled_with_fp32_acc": 5, "accumul": 5, "forc": 5, "fp32": 5, "vanilla": 5, "larger": [5, 12], "2205": 5, "14135": 5, "08691": 5, "extra": 5, "plan": 5, "order": [5, 12], "overal": 5, "mask": [5, 12], "abl": 5, "fly": [5, 11, 12], "do": [5, 7, 12], "dequant": [5, 12], "ia3": 5, "version": [5, 6, 9, 12], "occup": 5, "multi_block_mod": [5, 12, 13], "test": [5, 6, 9], "scenario": 5, "exact": 5, "definit": [5, 12], "hard": 5, "predict": 5, "rule": 5, "thumb": 5, "worth": 5, "num_head": [5, 12, 14, 17], "processor": 5, "suggest": 5, "evolv": [5, 11], "research": [5, 11], "conduct": 5, "There": [5, 6, 7, 10, 11, 12], "minimum": [5, 12, 17], "heurist": 5, "purpos": [5, 9], "better": 5, "go": 5, "s0": 5, "s1": 5, "s2": 5, "constraint": [5, 12], "relax": 5, "ineffici": 5, "resourc": 5, "common": [5, 12], "past": 5, "acceler": 5, "mean": [5, 6, 12, 17], "monolith": 5, "max_seqlen": [5, 12], "hidden_dim_per_head": [5, 12], "up": 5, "close": 5, "reach": [5, 6], "decompos": 5, "keep": [5, 6, 12], "track": 5, "recycl": 5, "simplifi": [5, 6, 12], "rest": 5, "fp16": 5, "bfloat16": [5, 11], "howev": 5, "kv_cache_quant_mod": [5, 12], "int8_kv_cach": [5, 11], "fp8_kv_cach": [5, 11], "kv_orig_quant_scal": [5, 12], "kv_quant_orig_scal": [5, 12], "reconstruct": [5, 12], "beam_width": [5, 12, 17], "si": 5, "bi": 5, "ti": 5, "integ": [5, 6, 11, 12], "stage": [5, 7], "concaten": [5, 12], "project": [5, 6, 9], "hidden": [5, 6, 12, 13], "3d": [5, 12], "batch_beam_s": [5, 12], "hidden_dim": [5, 12], "multipli": [5, 12], "num_token": [5, 12], "greater": [5, 6, 12], "word": [5, 6, 12, 17], "pseudo": [5, 6, 11, 12], "seq": 5, "context_phas": 5, "generation_phas": 5, "maintain": [5, 6, 11], "homogen": 5, "longer": [5, 6], "justifi": 5, "rotary_embedding_dim": [5, 12], "neox": [5, 6, 11], "j": [5, 6, 10, 11, 12], "form": [5, 12], "position_embedding_typ": [5, 12, 13, 14], "positionembeddingtyp": [5, 12, 13, 14], "rope_gpt_neox": [5, 12, 14], "rope_gptj": [5, 12], "slope": [5, 12], "constant": [5, 12], "f": [5, 6, 12], "q_scale": [5, 12, 13, 14], "sqrt": [5, 12], "head_siz": [5, 12, 17], "On": 5, "style": 5, "broader": 5, "aspect": 5, "encod": [5, 6, 11, 12], "kind": [5, 7], "ad": [5, 6, 7, 9, 12], "accord": [5, 12, 13], "lightweight": 5, "popular": 5, "t5": [5, 6], "famili": 5, "regular": [5, 12], "ahead": 5, "ii": [5, 12], "implicit": [5, 12], "suit": 5, "too": 5, "fit": 5, "turn": 5, "max_dist": [5, 12, 13, 14], "architectur": [6, 8, 9], "compos": 6, "declar": [6, 7], "gptsessiontest": [6, 9], "restrict": [6, 9, 12], "specif": [6, 7, 9, 12], "llama": [6, 11], "experiment": [6, 11], "now": 6, "enc_dec": 6, "folder": [6, 10, 11], "gptsession": 6, "specifi": [6, 7, 9, 12], "gptmodelconfig": 6, "worldconfig": 6, "come": 6, "famou": 6, "mpi_comm_world": 6, "warn": [6, 12], "descript": [6, 12], "compil": [6, 9, 12], "overload": 6, "getter": 6, "setter": 6, "vocabulari": [6, 13], "numlay": 6, "numhead": 6, "numkvhead": 6, "multi": [6, 8, 9, 12], "page": [6, 8, 12], "relev": 6, "numer": [6, 8], "lmm": 6, "thing": 6, "cluster": 6, "collabor": [6, 12], "tp": [6, 10, 12], "nvlink": 6, "consecut": 6, "harder": 6, "guarante": 6, "absenc": 6, "advantag": 6, "interconnect": 6, "a100": 6, "mpi_init": 6, "argc": 6, "argv": 6, "mpi_comm_s": 6, "mpi_comm_rank": 6, "shown": [6, 9, 12], "simplic": 6, "mpirun": 6, "command": [6, 9], "instal": [6, 9], "talk": 6, "administr": 6, "program": 6, "n": [6, 11, 12], "prepar": [6, 11, 12, 14], "four": [6, 7, 13], "longest": [6, 12], "ask": 6, "until": 6, "due": 6, "look": [6, 9], "were": [6, 10], "present": [6, 11], "allfinish": 6, "computelogit": 6, "generatetokensfromlogit": 6, "while": [6, 7, 11], "generationinput": 6, "generationoutput": 6, "mandatori": 6, "aka": [6, 12], "eo": 6, "50": 6, "256": 6, "257": 6, "fill": [6, 12], "slot": 6, "numtoken": 6, "match": [6, 7, 12], "made": 6, "flexibl": [6, 9], "ban": 6, "badwordslength": 6, "stopwordslength": 6, "let": [6, 7, 12], "consid": [6, 10, 12], "second": [6, 12], "row": [6, 11, 12], "exclus": [6, 11], "prefix": [6, 12], "diagram": 6, "inner": [6, 12], "maxseqlength": 6, "previou": 6, "prob": 6, "gather_all_token_logit": [6, 14, 17], "import": 6, "out": [6, 10, 12], "impact": 6, "lm": 6, "just": 6, "caller": 6, "samplingconfig": [6, 17], "except": [6, 12], "0f": 6, "penal": 6, "often": [6, 12], "irrespect": 6, "mutual": [6, 11], "finer": [6, 7], "grain": [6, 7], "random": 6, "seed": 6, "largest": 6, "similar": [6, 7, 12], "decai": 6, "exponenti": 6, "factual": 6, "enhanc": 6, "0e": 6, "influenc": 6, "remain": [6, 7, 12], "greedi": 6, "upper": [6, 12], "divers": 6, "factor": [6, 11, 12], "renam": 6, "beamsearchlengthpenalti": 6, "scalar": [6, 12], "gptdecod": 6, "doe": [6, 12, 17], "directli": [6, 7, 9], "satisfi": 6, "separ": [6, 9, 10, 12], "biggest": 6, "compar": [6, 12], "individu": 6, "behavior": [6, 12], "revisit": 6, "structur": [6, 7], "could": [6, 7], "rebuild": 6, "part": [7, 9, 12], "gw": 7, "manipul": 7, "modifi": 7, "highest": 7, "facilit": 7, "gemm": 7, "smoothquant": 7, "alter": 7, "fusion": [7, 11], "ideal": 7, "condit": [7, 12], "would": 7, "nest": 7, "flow": 7, "scatter": 7, "get_par": [7, 12], "get_us": [7, 12], "consum": [7, 12], "replace_all_uses_with": [7, 12], "replac": [7, 12], "origin": 7, "miss": 7, "especi": 7, "opaqu": 7, "world": [7, 12], "wise": 7, "singleton": [7, 12], "flayerinfomemo": 7, "replace_input_with": 7, "replace_output_uses_with": 7, "redirect": 7, "usag": [7, 12], "consist": [7, 11], "patternrewrit": 7, "match_and_rewrit": 7, "combin": [7, 10, 13], "complex": 7, "patternanalyz": 7, "analysi": 7, "analyz": 7, "rewritepatternmanag": 7, "label": [7, 12], "benefit": 7, "privileg": 7, "analysispatternmanag": 7, "vital": 7, "certain": 7, "manner": 7, "routin": 7, "subtract": 7, "test_graph_rewrit": 7, "naivepatternrewriter_replaceaddwithsub": 7, "replace_add_with_sub": 7, "root_lay": 7, "layertyp": 7, "elementwis": [7, 12], "separate_match_rewrit": 7, "enter": 7, "as_lay": 7, "elementwiseoper": [7, 12], "elementwise_sum": 7, "subgraph": 7, "get_input": 7, "old": 7, "elementwise_sub": 7, "dangl": 7, "prune": [7, 12], "explicitli": 7, "skip": 7, "mark_as_remov": 7, "deal": 7, "rather": 7, "unnecessari": 7, "share": [7, 9, 13], "nearli": 7, "never": 7, "depriv": 7, "phase": [7, 12], "sinc": [7, 9], "commonli": 7, "gptattentionpluginremovepaddingrewritepass": 7, "gpt_attention_plugin_remove_pad": 7, "plugin_v2": 7, "plugin_namespac": 7, "plugin_typ": 7, "gptattent": 7, "flayer": 7, "assert": [7, 12], "although": 7, "black": 7, "box": 7, "tensor_input": 7, "extern": [7, 17], "in_len": 7, "new_input": 7, "clone_input": 7, "arglist": 7, "new_out": 7, "replace_outputs_uses_with": 7, "quit": 7, "focu": 7, "u": 7, "real": [7, 9], "pleas": 7, "fuseattentionwithbiaspass": 7, "graph_rewrit": 7, "sourc": [8, 12, 13, 14, 16, 17], "debug": 8, "rewrit": [8, 12], "qunat": 8, "instruct": 9, "polygraphi": 9, "repositori": 9, "docker": 9, "platform": 9, "lf": 9, "apt": 9, "github": [9, 12], "com": [9, 12], "cd": 9, "submodul": 9, "recurs": 9, "pull": 9, "imag": 9, "release_build": 9, "cuda_arch": 9, "cmake": 9, "format": [9, 17], "ada": [9, 10], "hopper": [9, 10], "89": 9, "90": 9, "release_run": 9, "local_us": 9, "switch": 9, "local": 9, "account": 9, "root": [9, 12], "insid": [9, 12], "app": 9, "tag": 9, "devel": 9, "who": 9, "prefer": 9, "shell": 9, "target": 9, "dockerfil": 9, "ipc": 9, "ulimit": 9, "memlock": 9, "stack": 9, "67108864": 9, "pwd": 9, "workdir": 9, "script": [9, 11], "build_wheel": 9, "trt_root": 9, "usr": 9, "deploi": 9, "pip": 9, "whl": 9, "increment": 9, "clean": 9, "semicolon": 9, "amper": [9, 10], "cuda_architectur": 9, "80": 9, "86": 9, "cmakelist": 9, "txt": 9, "cpp_onli": 9, "particularli": 9, "avoid": 9, "introduc": [9, 11], "particular": 9, "dual": 9, "abi": 9, "gcc": 9, "overridden": 9, "build_dir": 9, "choos": 9, "against": 9, "These": 9, "libtensorrt_llm": 9, "libtensorrt_llm_stat": 9, "libnvinfer_plugin_tensorrt_llm": 9, "under": 9, "chang": [9, 12, 17], "summar": 10, "measur": 10, "tabl": [10, 12, 13], "observ": 10, "peak": 10, "benchmark": 10, "task": [10, 11, 13, 17], "tok": 10, "6b": 10, "128": [10, 14], "907": 10, "2048": 10, "179": 10, "229": 10, "980": 10, "7b": 10, "193": 10, "367": 10, "058": 10, "32": [10, 11, 14], "230": 10, "70b": 10, "317": 10, "616": 10, "843": 10, "583": 10, "falcon": [10, 11], "180b": 10, "96": 10, "686": 10, "073": 10, "465": 10, "630": 10, "859": 10, "757": 10, "240": 10, "622": 10, "581": 10, "531": 10, "679": 10, "558": 10, "526": 10, "650": 10, "486": 10, "459": 10, "529": 10, "592": 10, "237": 10, "181": 10, "272": 10, "738": 10, "929": 10, "923": 10, "202": 10, "perciev": 10, "1st": [10, 12], "29": 10, "36": 10, "26": 10, "109": 10, "27": 10, "205": 10, "71": 10, "73": 10, "129": 10, "133": 10, "47": 10, "377": 10, "61": 10, "509": 10, "address": 10, "simplest": [10, 12], "ieee": 11, "x": [11, 12, 13], "scale": [11, 12], "satfinit": 11, "fp": 11, "static_cast": 11, "2d": [11, 12], "column": [11, 12], "channel": 11, "mi": 11, "rang": [11, 12, 14], "ni": 11, "2211": [11, 12], "10438": 11, "accuraci": 11, "downstream": 11, "paper": 11, "preprocess": 11, "2210": 11, "17323": 11, "2306": 11, "00978": 11, "weightonlygroupwisequantmatmulplugin": 11, "weight_only_groupwise_quant_matmul": 11, "v2": 11, "sq": 11, "baichuan": 11, "bert": [11, 12], "chatglm": [11, 12], "opt": [11, 12], "santacod": 11, "starcod": 11, "int4_weight": 11, "w4a": 11, "int8_weight": 11, "w8a": 11, "a8": 11, "per_channel": 11, "per_token": 11, "per_group": 11, "fp8_qdq": 11, "allreducestrategi": 12, "intenum": 12, "customallreducekernel": 12, "kept": 12, "sync": [12, 17], "oneshot": 12, "ring": 12, "twoshot": 12, "attentionmasktyp": [12, 13, 14], "bidirect": 12, "causal": [12, 14], "dimrang": 12, "str": [12, 13, 14, 17], "profil": 12, "min": 12, "max": 12, "thu": 12, "param": [12, 17], "ctor": 12, "layernormpositiontyp": [12, 14], "pre_layernorm": [12, 14], "layernormtyp": [12, 14], "groupnorm": [12, 13], "rmsnorm": [12, 13], "alibi": 12, "alibi_with_scal": 12, "choic": 12, "is_alibi": 12, "is_rop": 12, "learned_absolut": [12, 13, 14], "rel": 12, "rotaryscalingtyp": 12, "dynam": [12, 14, 17], "dim_rang": 12, "is_network_input": 12, "tensorloc": 12, "dens": 12, "cast": 12, "properti": [12, 17], "is_dynam": 12, "exclud": 12, "is_trt_wrapp": 12, "itensor": 12, "differenti": 12, "necessari": 12, "inherit": 12, "hierarachi": 12, "physic": 12, "mark_output": 12, "keepdim": 12, "ndim": 12, "permut": 12, "new_tensor": 12, "undefin": 12, "exce": 12, "split_size_or_sect": 12, "transpos": 12, "dim0": 12, "dim1": 12, "zero_is_placehold": 12, "unaryoper": 12, "closur": 12, "round": 12, "exp": 12, "sin": 12, "iunarylay": 12, "unari": 12, "tanh": 12, "left": 12, "right": 12, "expand": 12, "sub": 12, "mul": 12, "prod": 12, "div": 12, "gt": 12, "lt": 12, "op_and": 12, "AND": 12, "op_or": 12, "OR": 12, "eq": 12, "pow": 12, "ielementwiselay": 12, "union": 12, "world_siz": 12, "amongst": 12, "particip": 12, "section_s": 12, "contribut": 12, "doc": 12, "deeplearn": 12, "html": 12, "workspac": [12, 13, 14], "instance_id": [12, 13], "strategi": 12, "replic": 12, "poitner": 12, "barrier": 12, "initil": 12, "arang": 12, "int32": 12, "float32": [12, 13, 14], "ifilllay": 12, "filloper": 12, "linspac": 12, "_str_to_trt_dtype_dict": 12, "_util": 12, "argmax": 12, "onnx": 12, "blob": 12, "md": 12, "reduct": 12, "ye": 12, "avg_pool2d": 12, "kernel_s": [12, 13], "stride": [12, 13], "ceil_mod": [12, 13], "count_include_pad": [12, 13], "bert_attent": 12, "relative_attent": [12, 13, 14], "relative_attention_bia": 12, "1706": 12, "03762": 12, "sum_of_token": 12, "bertattentionplugin": 12, "qkv": 12, "max_seq_len": 12, "embed": 12, "num_bucket": [12, 13, 14], "distanc": 12, "posit": 12, "broadcast_help": 12, "pair": 12, "int8": 12, "127": 12, "chunk": 12, "split_siz": 12, "clip": 12, "alpha": 12, "beta": 12, "inp": 12, "jj": 12, "len": [12, 17], "ndarrai": 12, "iconstantlay": 12, "numpi": 12, "weight": [12, 13], "serial": [12, 17], "constant_to_tensor_": 12, "conv2d": [12, 13], "dilat": [12, 13], "conv_transpose2d": 12, "output_pad": [12, 13], "einsum": 12, "einsum_eq": 12, "ieinsumlay": 12, "summat": 12, "over": 12, "equat": 12, "einstein": 12, "convent": 12, "ascii": 12, "letter": 12, "comma": 12, "subscript": 12, "repeat": 12, "diagon": 12, "ax": 12, "omit": 12, "express": 12, "alphabet": 12, "arrow": 12, "ij": 12, "jk": 12, "ik": 12, "equival": 12, "ellipsi": 12, "place": 12, "syntax": 12, "rubric": 12, "ji": 12, "kj": 12, "dot": 12, "ijk": 12, "ikl": 12, "ijl": 12, "neither": 12, "elementwise_binari": 12, "sharding_dim": [12, 13], "tp_rank": [12, 13], "lookup": [12, 13], "among": 12, "portion": 12, "transposit": 12, "whole": 12, "divid": 12, "default_net": 12, "plugin_config": 12, "lookup_plugin": 12, "igatherlay": 12, "tg_group": 12, "shard": [12, 13], "vocab": 12, "calcul": 12, "expand_shap": 12, "expans": 12, "islicelay": 12, "verifi": 12, "shrink": 12, "behaviour": 12, "subject": 12, "expand_dim": 12, "ishufflelay": 12, "new_shap": 12, "append": 12, "shuffl": 12, "expand_dims_lik": 12, "expand_mask": 12, "tgt_len": 12, "src_seq_len": 12, "tgt_seq_len": 12, "3rd": 12, "2nd": 12, "flip": 12, "revers": 12, "axi": 12, "down": 12, "gatherel": 12, "gather_last_token_logit": 12, "extract": 12, "last_tokens_id": 12, "th": 12, "lenght": 12, "geglu": 12, "gate": 12, "gelu": [12, 14], "halv": 12, "generate_alibi_bias": 12, "key_length": 12, "bias": 12, "05100": 12, "generate_alibi_slop": 12, "alibi_scal": 12, "past_key_valu": [12, 13], "host_past_key_value_length": [12, 13], "context_length": [12, 13, 17], "host_request_typ": [12, 13], "num_kv_head": [12, 13, 14, 17], "hidden_size_per_head": 12, "rotary_embedding_bas": [12, 13], "10000": [12, 13, 14], "rotary_embedding_scale_typ": 12, "rotary_embedding_scal": 12, "rotary_embedding_max_posit": 12, "1024": [12, 13], "max_context_length": [12, 13, 17], "mask_typ": 12, "alibi_slop": 12, "kv_cache_block_point": [12, 13, 14, 17], "do_cross_attent": [12, 13], "cross_qkv": 12, "cross_qkv_length": 12, "encoder_input_length": [12, 13, 17], "host_context_length": [12, 13, 17], "qkv_bia": [12, 14], "still": 12, "progress": 12, "hint": 12, "regard": 12, "merg": 12, "contigu": 12, "max_block": 12, "num_tokens_per_block": 12, "inflight": 12, "rope": 12, "theta": 12, "ignor": 12, "rotari": 12, "fp8": 12, "max_blocks_per_sequ": 12, "cross": 12, "group_norm": 12, "num_group": [12, 13], "ep": [12, 13, 14], "1e": [12, 13, 14], "05": [12, 13, 14], "todo": 12, "index_select": 12, "5th": 12, "interpol": 12, "scale_factor": 12, "nearest": 12, "align_corn": 12, "recompute_scale_factor": 12, "antialia": 12, "is_gated_activ": 12, "layer_norm": 12, "normalized_shap": [12, 13], "use_diff_of_squar": 12, "norm": 12, "normal": 12, "fraction": 12, "gamma": 12, "formula": 12, "varianc": 12, "squar": 12, "var": 12, "epsilon": 12, "mat2": 12, "transa": 12, "transb": 12, "imatrixmultiplylay": 12, "ireducelay": 12, "non_gated_vers": 12, "swiglu": [12, 14], "outer": 12, "vec2": 12, "p2p": 12, "ncclrecv": 12, "rms_norm": 12, "06": [12, 13, 14], "weig": 12, "ncclsend": 12, "becaus": 12, "learn": 12, "emul": 12, "slicemod": 12, "strict_bound": 12, "isoftmaxlay": 12, "softplu": 12, "threshold": 12, "stabl": 12, "nn": 12, "revert": 12, "threashold": 12, "ith": 12, "squared_relu": 12, "untouch": 12, "unsqueez": 12, "enforc": 12, "iselectlay": 12, "mish": 13, "num_attention_head": [13, 14], "max_position_embed": [13, 14], "num_lay": [13, 14, 17], "apply_query_key_layer_sc": [13, 14], "attention_mask_typ": [13, 14], "rotary_embedding_sc": 13, "use_int8_kv_cach": [13, 14], "rotary_embedding_percentag": [13, 14], "quant_mod": [13, 14, 17], "cross_attent": [13, 17], "use_cach": [13, 14], "kv_cache_param": [13, 14], "attention_param": [13, 14], "encoder_output": [13, 14, 17], "attentionparam": 13, "encoder_max_input_length": [13, 17], "is_valid": 13, "gpt_attention_plugin": [13, 17], "is_valid_cross_attn": 13, "bertattent": 13, "keyvaluecacheparam": 13, "get_first_kv_cache_block_point": 13, "get_first_past_key_valu": 13, "output_dtyp": 13, "in_channel": 13, "out_channel": 13, "padding_mod": 13, "convtranspose2d": 13, "output_s": 13, "num_embed": 13, "embedding_dim": 13, "And": 13, "prompttuningembed": 13, "vocab_s": [13, 14, 17], "though": 13, "prompt": 13, "prompt_embedding_t": [13, 14, 17], "task_vocab_s": 13, "alia": 13, "share_weight": 13, "multiply_gath": 13, "gemm_plugin": 13, "use_fp8": 13, "multiply_reduc": 13, "gatedmlp": 13, "hidden_act": [13, 14], "num_channel": 13, "affin": 13, "elementwise_affin": 13, "avgpool2d": 13, "baichuanforcausallm": 14, "mlp_hidden_s": 14, "baichuanmodel": 14, "generationmixin": 14, "max_num_token": 14, "brief": [14, 17], "fed": 14, "bertforquestionansw": 14, "type_vocab_s": 14, "num_label": 14, "token_type_id": 14, "bertmodel": 14, "bloomforcausallm": 14, "multi_query_mod": 14, "use_parallel_embed": 14, "embedding_sharding_dim": 14, "share_embedding_t": 14, "bloommodel": 14, "chatglm2headmodel": 14, "kv_channel": 14, "multi_query_group_num": 14, "linear_bia": 14, "ffn_hiden_s": 14, "13696": 14, "28": 14, "act_func": 14, "max_seq_length": 14, "32768": 14, "65024": 14, "chatglm2model": 14, "chatglm6bheadmodel": 14, "inter_s": 14, "chatglm6bmodel": 14, "decodermodel": 14, "encoder_num_head": 14, "encoder_hidden_s": 14, "logits_dtyp": 14, "has_position_embed": [14, 17], "has_embedding_layernorm": 14, "has_embedding_scal": 14, "has_attention_qkvo_bia": 14, "has_mlp_bia": 14, "has_model_final_layernorm": 14, "layernorm_ep": 14, "layernorm_posit": 14, "layernorm_typ": 14, "has_lm_head_bia": 14, "residual_sc": 14, "decoder_input_id": 14, "max_encoder_input_len": 14, "encodermodel": 14, "falconforcausallm": 14, "use_alibi": 14, "parallel_attent": 14, "new_decoder_architectur": 14, "falconmodel": 14, "all_reduce_workspac": 14, "gptjforcausallm": 14, "rotary_dim": 14, "gptjmodel": 14, "enable_two_optimization_profil": 14, "gptlmheadmodel": 14, "use_prompt_tun": [14, 17], "gptmodel": 14, "prompt_task": 14, "prompt_vocab_s": [14, 17], "prompt_embedding_table_s": 14, "gptneoxforcausallm": 14, "gptneoxmodel": 14, "llamaforcausallm": 14, "rotary_bas": 14, "rotary_sc": 14, "rms_norm_ep": 14, "llamamodel": 14, "optlmheadmodel": 14, "pre_norm": 14, "do_layer_norm_befor": 14, "optmodel": 14, "fp8_quantiz": 14, "quant_scal": 14, "dict": [14, 17], "smooth_quant": 14, "weight_only_groupwise_quant": 14, "group_siz": 14, "pre_quant_scal": 14, "exclude_modul": 14, "current_key_nam": 14, "weight_only_quant": 14, "intflag": 16, "chatglm6bheadmodelgenerationsess": 17, "debug_tensors_to_sav": 17, "cuda_graph_mod": 17, "buffer_alloc": 17, "_runtim": 17, "generationsequ": 17, "seq_idx": 17, "batch_idx": 17, "get_batch_idx": 17, "idx": 17, "get_seq_idx": 17, "cuda_stream_guard": 17, "session": 17, "exit": 17, "sampling_config": 17, "stop_words_list": 17, "bad_words_list": 17, "no_repeat_ngram_s": 17, "output_sequence_length": 17, "return_dict": 17, "decode_batch": 17, "decode_regular": 17, "ite": 17, "sequence_limit_length": 17, "decode_stream": 17, "finalize_decod": 17, "first_lay": 17, "handle_per_step": 17, "has_token_type_embed": 17, "last_lay": 17, "num_heads_kv": 17, "paged_kv_cach": 17, "pp_communicate_final_output_id": 17, "final_output_id": 17, "pp_communicate_new_token": 17, "should_stop": 17, "cache_indir": 17, "tokens_per_block": 17, "use_custom_all_reduc": 17, "memory_pool": 17, "max_blocks_per_seq": 17, "add_sequ": 17, "context_len": 17, "get_pointer_arrai": 17, "model_nam": 17, "end_id": 17, "pad_id": 17, "top_k": 17, "top_p": 17, "length_penalti": 17, "repetition_penalti": 17, "min_length": 17, "presence_penalti": 17, "use_beam_hyp": 17, "beam_search_diversity_r": 17, "output_cum_log_prob": 17, "output_log_prob": 17, "random_se": 17, "kwarg": 17, "iexecutioncontext": 17, "create_execution_context": 17, "icudaengin": 17, "from_engin": 17, "from_serialized_engin": 17, "infer_shap": 17, "tensorinfo": 17, "everi": 17, "Or": 17, "set_input_shap": 17, "manual": 17, "succeed": 17, "async": 17, "to_word_list_format": 17, "word_dict": 17, "sentenc": 17, "am": 17, "happi": 17, "sad": 17}, "objects": {"": [[2, 0, 1, "_CPPv48nvinfer1", "nvinfer1"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [2, 0, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [2, 0, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [2, 0, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm6layers18DynamicDecodeLayerE", "tensorrt_llm::layers::DynamicDecodeLayer"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm6layers18DynamicDecodeLayerE", "tensorrt_llm::layers::DynamicDecodeLayer::T"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr", "tensorrt_llm::runtime::BufferManager::BufferManager"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 2, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool::device"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5beginEv", "tensorrt_llm::runtime::BufferRange::begin"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange5beginEv", "tensorrt_llm::runtime::BufferRange::begin"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange6cbeginEv", "tensorrt_llm::runtime::BufferRange::cbegin"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange6cbeginEv", "tensorrt_llm::runtime::BufferRange::cbegin"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4cendEv", "tensorrt_llm::runtime::BufferRange::cend"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4cendEv", "tensorrt_llm::runtime::BufferRange::cend"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange14const_iteratorE", "tensorrt_llm::runtime::BufferRange::const_iterator"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange13const_pointerE", "tensorrt_llm::runtime::BufferRange::const_pointer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange15const_referenceE", "tensorrt_llm::runtime::BufferRange::const_reference"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange3endEv", "tensorrt_llm::runtime::BufferRange::end"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange3endEv", "tensorrt_llm::runtime::BufferRange::end"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange8iteratorE", "tensorrt_llm::runtime::BufferRange::iterator"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mDataE", "tensorrt_llm::runtime::BufferRange::mData"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mSizeE", "tensorrt_llm::runtime::BufferRange::mSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]::index"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type", "tensorrt_llm::runtime::BufferRange::operator[]::index"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange7pointerE", "tensorrt_llm::runtime::BufferRange::pointer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange9referenceE", "tensorrt_llm::runtime::BufferRange::reference"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4sizeEv", "tensorrt_llm::runtime::BufferRange::size"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange9size_typeE", "tensorrt_llm::runtime::BufferRange::size_type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange10value_typeE", "tensorrt_llm::runtime::BufferRange::value_type"], [2, 1, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime11CppDataTypeE", "tensorrt_llm::runtime::CppDataType"], [2, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime11CppDataTypeE", "tensorrt_llm::runtime::CppDataType::kDataType"], [2, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime11CppDataTypeE", "tensorrt_llm::runtime::CppDataType::kIsPointer"], [2, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime11CppDataTypeE", "tensorrt_llm::runtime::CppDataType::kIsUnsigned"], [2, 1, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime11CppDataTypeI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::CppDataType<kDataType, kUnsigned, true>"], [2, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime11CppDataTypeI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::CppDataType<kDataType, kUnsigned, true>::kDataType"], [2, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime11CppDataTypeI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::CppDataType<kDataType, kUnsigned, true>::kUnsigned"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::CppDataType<kDataType, kUnsigned, true>::type"], [2, 1, 1, "_CPPv4I_bEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kBOOL, kUnsigned>"], [2, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kBOOL, kUnsigned>::kUnsigned"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kBOOL, kUnsigned>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kFLOAT>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kFLOAT>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kHALF>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kHALF>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT32, true>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT32, true>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT32>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT32>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT64, true>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT64, true>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT64>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT64>::type"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT8>"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kINT8>::type"], [2, 1, 1, "_CPPv4I_bEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kUINT8, kUnsigned>"], [2, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kUINT8, kUnsigned>::kUnsigned"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::CppDataType<nvinfer1::DataType::kUINT8, kUnsigned>::type"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::endIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::logits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxLength"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE", "tensorrt_llm::runtime::DecodingInput::badWordsList"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", "tensorrt_llm::runtime::DecodingInput::logits"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE", "tensorrt_llm::runtime::DecodingInput::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbs"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::isDone"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScores"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScores"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeams"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsTgt"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsTgt"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::ids"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE", "tensorrt_llm::runtime::DecodingOutput::finished"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE", "tensorrt_llm::runtime::GenerationInput"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::endId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::lengths"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::packed"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::padId"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenerationInput::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput12badWordsListE", "tensorrt_llm::runtime::GenerationInput::badWordsList"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput16embeddingBiasOptE", "tensorrt_llm::runtime::GenerationInput::embeddingBiasOpt"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput5endIdE", "tensorrt_llm::runtime::GenerationInput::endId"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput3idsE", "tensorrt_llm::runtime::GenerationInput::ids"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput7lengthsE", "tensorrt_llm::runtime::GenerationInput::lengths"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput12maxNewTokensE", "tensorrt_llm::runtime::GenerationInput::maxNewTokens"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput6packedE", "tensorrt_llm::runtime::GenerationInput::packed"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput5padIdE", "tensorrt_llm::runtime::GenerationInput::padId"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput13stopWordsListE", "tensorrt_llm::runtime::GenerationInput::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE", "tensorrt_llm::runtime::GenerationOutput"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput8CallbackE", "tensorrt_llm::runtime::GenerationOutput::Callback"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::ids"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenerationOutput::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput13contextLogitsE", "tensorrt_llm::runtime::GenerationOutput::contextLogits"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput3idsE", "tensorrt_llm::runtime::GenerationOutput::ids"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput8logProbsE", "tensorrt_llm::runtime::GenerationOutput::logProbs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16onTokenGeneratedE", "tensorrt_llm::runtime::GenerationOutput::onTokenGenerated"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mAllocatorE", "tensorrt_llm::runtime::GptDecoder::mAllocator"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t", "tensorrt_llm::runtime::GptDecoder::setup"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE", "tensorrt_llm::runtime::GptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatch::CudaStreamPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingInputPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingOutputPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::stream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSizePadded"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatch::TensorPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync::e"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch17getFinalOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getFinalOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch17getFinalOutputIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getFinalOutputIds"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch17getFinalOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getFinalOutputIds::batchIdx"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getFinished"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch16getFinishedBeamsEv", "tensorrt_llm::runtime::GptDecoderBatch::getFinishedBeams"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbFinished"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbSteps"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensEv", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds::batchIdx"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch16getOutputLengthsEv", "tensorrt_llm::runtime::GptDecoderBatch::getOutputLengths"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getParentIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14isFinishedSyncEv", "tensorrt_llm::runtime::GptDecoderBatch::isFinishedSync"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mActualBatchSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE", "tensorrt_llm::runtime::GptDecoderBatch::mBeamWidths"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatch::mBufferManager"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE", "tensorrt_llm::runtime::GptDecoderBatch::mDecoders"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingInputs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingOutputs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE", "tensorrt_llm::runtime::GptDecoderBatch::mFinished"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE", "tensorrt_llm::runtime::GptDecoderBatch::mFinishedSum"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardEvent"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardToken"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingInput"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingOutput"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxNewTokens"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxSequenceLength"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE", "tensorrt_llm::runtime::GptDecoderBatch::mNbSteps"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE", "tensorrt_llm::runtime::GptDecoderBatch::mStream"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE", "tensorrt_llm::runtime::GptDecoderBatch::mStreams"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSizePadded"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::inputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::samplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::batchIdx"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::request"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::samplingConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest::batchIdx"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::dtype"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBeamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxSequenceLength"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mGptModelConfig"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE", "tensorrt_llm::runtime::GptModelConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::dtype"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::hiddenSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbHeads"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbLayers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::vocabSize"], [2, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGlm"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGpt"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits::computeContextLogits"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv", "tensorrt_llm::runtime::GptModelConfig::getDataType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::GptModelConfig::getHiddenSize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxBatchSize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxInputLen"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::GptModelConfig::getMaxNumTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxOutputLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxOutputLen"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv", "tensorrt_llm::runtime::GptModelConfig::getModelVariant"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbHeads"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbKvHeads"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers::pipelineParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv", "tensorrt_llm::runtime::GptModelConfig::getQuantMode"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::GptModelConfig::getSizePerHead"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::GptModelConfig::getTokensPerBlock"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::GptModelConfig::getVocabSize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded::worldSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeContextLogits"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE", "tensorrt_llm::runtime::GptModelConfig::mDataType"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::GptModelConfig::mHiddenSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE", "tensorrt_llm::runtime::GptModelConfig::mInputPacked"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxBatchSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxInputLen"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::GptModelConfig::mMaxNumTokens"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxOutputLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxOutputLen"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE", "tensorrt_llm::runtime::GptModelConfig::mModelVariant"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbHeads"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbKvHeads"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE", "tensorrt_llm::runtime::GptModelConfig::mNbLayers"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE", "tensorrt_llm::runtime::GptModelConfig::mPagedKvCache"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE", "tensorrt_llm::runtime::GptModelConfig::mQuantMode"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::GptModelConfig::mTokensPerBlock"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE", "tensorrt_llm::runtime::GptModelConfig::mUseCustomAllReduce"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseGptAttentionPlugin"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE", "tensorrt_llm::runtime::GptModelConfig::mVocabSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize::maxBatchSize"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen::maxInputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens::maxNumTokens"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxOutputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxOutputLen"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxOutputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxOutputLen::maxOutputLen"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant::modelVariant"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads::nbKvHeads"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode::QuantMode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock::TokensPerBlock"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::GptModelConfig::supportsInflightBatching"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce::customAllReduce"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput::inputPacked"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache::pagedKvCache"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSessionE", "tensorrt_llm::runtime::GptSession"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create::graph"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16cudaGraphExecPtrE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::cudaGraphExecPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch::stream"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::nextContextId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::runtime"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update::graph"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream::stream"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineFile"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE", "tensorrt_llm::runtime::GptSession::KvCacheManager"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE", "tensorrt_llm::runtime::GptSession::LoggerPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers::numMicroBatches"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType", "tensorrt_llm::runtime::GptSession::createContexts"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType", "tensorrt_llm::runtime::GptSession::createContexts::numMicroBatches"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::maxSequenceLength"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::decoderPerRequest"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::logitsType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType", "tensorrt_llm::runtime::GptSession::createDecoders::numMicroBatches"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::createKvCacheManagers"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::createKvCacheManagers::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::createKvCacheManagers::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::createKvCacheManagers::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::createKvCacheManagers::maxTokensInPagedKvCache"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::createKvCacheManagers::numMicroBatches"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncERN7ITensor9SharedPtrERN7ITensor9SharedPtrE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncERN7ITensor9SharedPtrERN7ITensor9SharedPtrE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::decoderStep"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncERN7ITensor9SharedPtrERN7ITensor9SharedPtrE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::microBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncERN7ITensor9SharedPtrERN7ITensor9SharedPtrE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::newTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncERN7ITensor9SharedPtrERN7ITensor9SharedPtrE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::outputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17finalizeOutputIdsER7ITensor8SizeType", "tensorrt_llm::runtime::GptSession::finalizeOutputIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17finalizeOutputIdsER7ITensor8SizeType", "tensorrt_llm::runtime::GptSession::finalizeOutputIds::microBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17finalizeOutputIdsER7ITensor8SizeType", "tensorrt_llm::runtime::GptSession::finalizeOutputIds::outputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::inputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::outputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generate::samplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18generateMultiBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateMultiBatch"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18generateMultiBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateMultiBatch::inputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18generateMultiBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateMultiBatch::outputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18generateMultiBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateMultiBatch::samplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19generateSingleBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateSingleBatch"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19generateSingleBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateSingleBatch::inputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19generateSingleBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateSingleBatch::outputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19generateSingleBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::GptSession::generateSingleBatch::samplingConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv", "tensorrt_llm::runtime::GptSession::getBufferManager"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv", "tensorrt_llm::runtime::GptSession::getDevice"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv", "tensorrt_llm::runtime::GptSession::getLogger"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv", "tensorrt_llm::runtime::GptSession::getModelConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv", "tensorrt_llm::runtime::GptSession::getWorldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13initNewTokensERK15GenerationInputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13initNewTokensERK15GenerationInputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initNewTokens::inputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13initNewTokensERK15GenerationInputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initNewTokens::microBatchId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13initNewTokensERK15GenerationInputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initNewTokens::samplingConfig"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession15isCudaGraphModeEv", "tensorrt_llm::runtime::GptSession::isCudaGraphMode"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::microBatchId"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE", "tensorrt_llm::runtime::GptSession::mBuffers"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE", "tensorrt_llm::runtime::GptSession::mCommEvent"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE", "tensorrt_llm::runtime::GptSession::mCommStream"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE", "tensorrt_llm::runtime::GptSession::mCudaGraphInstances"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE", "tensorrt_llm::runtime::GptSession::mCudaGraphMode"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE", "tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE", "tensorrt_llm::runtime::GptSession::mDecoders"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE", "tensorrt_llm::runtime::GptSession::mDevice"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16mKvCacheManagersE", "tensorrt_llm::runtime::GptSession::mKvCacheManagers"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE", "tensorrt_llm::runtime::GptSession::mLogger"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE", "tensorrt_llm::runtime::GptSession::mModelConfig"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16mNumMicroBatchesE", "tensorrt_llm::runtime::GptSession::mNumMicroBatches"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE", "tensorrt_llm::runtime::GptSession::mPipelineComm"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE", "tensorrt_llm::runtime::GptSession::mReceivedEvents"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE", "tensorrt_llm::runtime::GptSession::mRuntime"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE", "tensorrt_llm::runtime::GptSession::mWorldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16setCudaGraphModeEb", "tensorrt_llm::runtime::GptSession::setCudaGraphMode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16setCudaGraphModeEb", "tensorrt_llm::runtime::GptSession::setCudaGraphMode::value"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup::decoderPerRequest"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup::maxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup::maxBeamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup::maxSequenceLength"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup::maxTokensInPagedKvCache"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::setup::numMicroBatches"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::beamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::microBatchId"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingInput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingOutput"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::finalOutputIds"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::manager"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t", "tensorrt_llm::runtime::IGptDecoder::setup"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE", "tensorrt_llm::runtime::IGptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::CudaStreamPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv", "tensorrt_llm::runtime::IGptDecoderBatch::IGptDecoderBatch"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TensorPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TokenPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync::token"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch17getFinalOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getFinalOutputIds"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch17getFinalOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getFinalOutputIds::batchIdx"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::IGptDecoderBatch::getFinished"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch16getFinishedBeamsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getFinishedBeams"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getNbSteps"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds"], [2, 4, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds::batchIdx"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch16getOutputLengthsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputLengths"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getParentIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::batchIdx"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::request"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::IGptDecoderBatch::newRequest::samplingConfig"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE", "tensorrt_llm::runtime::IStatefulGptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv", "tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::output"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder17getFinalOutputIdsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getFinalOutputIds"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getOutputIds"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder14isFinishedSyncEv", "tensorrt_llm::runtime::IStatefulGptDecoder::isFinishedSync"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::inputs"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::samplingConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::dtype"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBatchSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBeamWidth"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxSequenceLength"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [2, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [2, 2, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [2, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE", "tensorrt_llm::runtime::IpcMemory::TensorPtr"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrsTensor"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE", "tensorrt_llm::runtime::IpcMemory::mBufferPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE", "tensorrt_llm::runtime::IpcMemory::mBufferSize"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE", "tensorrt_llm::runtime::IpcMemory::mWorldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE", "tensorrt_llm::runtime::MemoryCounters::SizeType"], [2, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [2, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [2, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [2, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters9mInstanceE", "tensorrt_llm::runtime::MemoryCounters::mInstance"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [2, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [2, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [2, 1, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [2, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kCPU>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kCPU>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kGPU>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kGPU>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kPINNED>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString<MemoryType::kPINNED>::value"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE", "tensorrt_llm::runtime::SizeType"], [2, 0, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [2, 1, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [2, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [2, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType<T*>"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType<T*>::T"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType<T*>::kUnderlyingType"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType<T*>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType<bool>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType<bool>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType<float>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType<float>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType<half>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType<half>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType<std::int32_t>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::int32_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType<std::int64_t>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::int64_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType<std::int8_t>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::int8_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType<std::uint32_t>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::uint32_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType<std::uint64_t>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::uint64_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType<std::uint8_t>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType<std::uint8_t>::value"], [2, 1, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType<void*>"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType<void*>::value"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [2, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::logger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::logger"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::pipelineParallelism"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType", "tensorrt_llm::runtime::WorldConfig::validConfig::tensorParallelism"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [2, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [2, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [2, 2, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [2, 2, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [2, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [2, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [2, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE", "tensorrt_llm::runtime::decoder::Input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input::logits"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE", "tensorrt_llm::runtime::decoder::Input::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Input::cacheIndirection"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE", "tensorrt_llm::runtime::decoder::Input::logits"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE", "tensorrt_llm::runtime::decoder::Output"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv", "tensorrt_llm::runtime::decoder::Output::Output"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE", "tensorrt_llm::runtime::decoder::Output::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Output::cacheIndirection"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE", "tensorrt_llm::runtime::decoder::Output::sequenceLengths"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", "tensorrt_llm::runtime::decoder_batch::Input"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input4BaseE", "tensorrt_llm::runtime::decoder_batch::Input::Base"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtrRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtrRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtrRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE", "tensorrt_llm::runtime::decoder_batch::Input::active"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", "tensorrt_llm::runtime::decoder_batch::Output"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", "tensorrt_llm::runtime::decoder_batch::Request"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE9TensorPtrNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE9TensorPtrNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::endId"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE9TensorPtrNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::ids"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE9TensorPtrNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::maxNewTokens"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE9TensorPtrNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::padId"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorPtr"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::badWordsList"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", "tensorrt_llm::runtime::decoder_batch::Request::embeddingBias"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", "tensorrt_llm::runtime::decoder_batch::Request::endId"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", "tensorrt_llm::runtime::decoder_batch::Request::ids"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", "tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::stopWordsList"], [2, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE", "tensorrt_llm::runtime::decoder_batch::Token"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::active"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::event"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE", "tensorrt_llm::runtime::decoder_batch::Token::active"], [2, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE", "tensorrt_llm::runtime::decoder_batch::Token::event"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator<<"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator<<"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator<<"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator<<::buffer"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator<<::dims"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator<<::output"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator<<::output"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator<<::output"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator<<::tensor"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::enable"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::worldConfig"], [2, 0, 1, "_CPPv4N12tensorrt_llm7runtime5utilsE", "tensorrt_llm::runtime::utils"], [2, 3, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine"], [2, 4, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine::enginePath"], [17, 8, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[12, 8, 0, "-", "functional"], [14, 8, 0, "-", "models"], [15, 8, 0, "-", "plugin"], [16, 8, 0, "-", "quantization"], [17, 8, 0, "-", "runtime"]], "tensorrt_llm.functional": [[12, 9, 1, "", "AllReduceStrategy"], [12, 9, 1, "", "AttentionMaskType"], [12, 9, 1, "", "DimRange"], [12, 9, 1, "", "LayerNormPositionType"], [12, 9, 1, "", "LayerNormType"], [12, 9, 1, "", "PositionEmbeddingType"], [12, 9, 1, "", "RotaryScalingType"], [12, 9, 1, "", "Tensor"], [12, 13, 1, "", "abs"], [12, 13, 1, "", "activation"], [12, 13, 1, "", "add"], [12, 13, 1, "", "allgather"], [12, 13, 1, "", "allreduce"], [12, 13, 1, "", "arange"], [12, 13, 1, "", "argmax"], [12, 13, 1, "", "assertion"], [12, 13, 1, "", "avg_pool2d"], [12, 13, 1, "", "bert_attention"], [12, 13, 1, "", "broadcast_helper"], [12, 13, 1, "", "cast"], [12, 13, 1, "", "chunk"], [12, 13, 1, "", "clip"], [12, 13, 1, "", "concat"], [12, 13, 1, "", "constant"], [12, 13, 1, "", "constant_to_tensor_"], [12, 13, 1, "", "conv2d"], [12, 13, 1, "", "conv_transpose2d"], [12, 13, 1, "", "cos"], [12, 13, 1, "", "div"], [12, 13, 1, "", "einsum"], [12, 13, 1, "", "elementwise_binary"], [12, 13, 1, "", "embedding"], [12, 13, 1, "", "eq"], [12, 13, 1, "", "exp"], [12, 13, 1, "", "expand"], [12, 13, 1, "", "expand_dims"], [12, 13, 1, "", "expand_dims_like"], [12, 13, 1, "", "expand_mask"], [12, 13, 1, "", "flip"], [12, 13, 1, "", "gather"], [12, 13, 1, "", "gather_last_token_logits"], [12, 13, 1, "", "geglu"], [12, 13, 1, "", "gelu"], [12, 13, 1, "", "generate_alibi_biases"], [12, 13, 1, "", "generate_alibi_slopes"], [12, 13, 1, "", "gpt_attention"], [12, 13, 1, "", "group_norm"], [12, 13, 1, "", "gt"], [12, 13, 1, "", "identity"], [12, 13, 1, "", "index_select"], [12, 13, 1, "", "interpolate"], [12, 13, 1, "", "is_gated_activation"], [12, 13, 1, "", "layer_norm"], [12, 13, 1, "", "lt"], [12, 13, 1, "", "matmul"], [12, 13, 1, "", "max"], [12, 13, 1, "", "maximum"], [12, 13, 1, "", "mean"], [12, 13, 1, "", "minimum"], [12, 13, 1, "", "mul"], [12, 13, 1, "", "non_gated_version"], [12, 13, 1, "", "op_and"], [12, 13, 1, "", "op_or"], [12, 13, 1, "", "outer"], [12, 13, 1, "", "permute"], [12, 13, 1, "", "pow"], [12, 13, 1, "", "recv"], [12, 13, 1, "", "relu"], [12, 13, 1, "", "rms_norm"], [12, 13, 1, "", "round"], [12, 13, 1, "", "select"], [12, 13, 1, "", "send"], [12, 13, 1, "", "shape"], [12, 13, 1, "", "sigmoid"], [12, 13, 1, "", "silu"], [12, 13, 1, "", "sin"], [12, 13, 1, "", "slice"], [12, 13, 1, "", "softmax"], [12, 13, 1, "", "softplus"], [12, 13, 1, "", "split"], [12, 13, 1, "", "sqrt"], [12, 13, 1, "", "squared_relu"], [12, 13, 1, "", "sub"], [12, 13, 1, "", "swiglu"], [12, 13, 1, "", "tanh"], [12, 13, 1, "", "transpose"], [12, 13, 1, "", "unary"], [12, 13, 1, "", "unsqueeze"], [12, 13, 1, "", "view"], [12, 13, 1, "", "where"]], "tensorrt_llm.functional.AllReduceStrategy": [[12, 10, 1, "", "AUTO"], [12, 10, 1, "", "ONESHOT"], [12, 10, 1, "", "RING"], [12, 10, 1, "", "TWOSHOT"]], "tensorrt_llm.functional.AttentionMaskType": [[12, 10, 1, "", "bidirectional"], [12, 10, 1, "", "causal"], [12, 10, 1, "", "padding"]], "tensorrt_llm.functional.LayerNormPositionType": [[12, 10, 1, "", "post_layernorm"], [12, 10, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[12, 10, 1, "", "GroupNorm"], [12, 10, 1, "", "LayerNorm"], [12, 10, 1, "", "RmsNorm"]], "tensorrt_llm.functional.PositionEmbeddingType": [[12, 10, 1, "", "alibi"], [12, 10, 1, "", "alibi_with_scale"], [12, 11, 1, "", "choices"], [12, 11, 1, "", "is_alibi"], [12, 11, 1, "", "is_rope"], [12, 10, 1, "", "learned_absolute"], [12, 10, 1, "", "relative"], [12, 10, 1, "", "rope_gpt_neox"], [12, 10, 1, "", "rope_gptj"]], "tensorrt_llm.functional.RotaryScalingType": [[12, 10, 1, "", "dynamic"], [12, 10, 1, "", "linear"], [12, 10, 1, "", "none"]], "tensorrt_llm.functional.Tensor": [[12, 11, 1, "", "abs"], [12, 11, 1, "", "cast"], [12, 12, 1, "", "dtype"], [12, 11, 1, "", "get_parent"], [12, 11, 1, "", "get_users"], [12, 11, 1, "", "is_dynamic"], [12, 11, 1, "", "is_trt_wrapper"], [12, 12, 1, "", "location"], [12, 11, 1, "", "mark_output"], [12, 11, 1, "", "max"], [12, 11, 1, "", "mean"], [12, 12, 1, "", "name"], [12, 11, 1, "", "ndim"], [12, 11, 1, "", "permute"], [12, 11, 1, "", "rank"], [12, 11, 1, "", "replace_all_uses_with"], [12, 12, 1, "", "shape"], [12, 11, 1, "", "size"], [12, 11, 1, "", "split"], [12, 11, 1, "", "sqrt"], [12, 11, 1, "", "transpose"], [12, 11, 1, "", "view"]], "tensorrt_llm.layers": [[13, 8, 0, "-", "activation"], [13, 8, 0, "-", "attention"], [13, 8, 0, "-", "cast"], [13, 8, 0, "-", "conv"], [13, 8, 0, "-", "embedding"], [13, 8, 0, "-", "linear"], [13, 8, 0, "-", "mlp"], [13, 8, 0, "-", "normalization"], [13, 8, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[13, 9, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[13, 9, 1, "", "Attention"], [13, 9, 1, "", "AttentionParams"], [13, 9, 1, "", "BertAttention"], [13, 9, 1, "", "KeyValueCacheParams"]], "tensorrt_llm.layers.attention.Attention": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention.AttentionParams": [[13, 11, 1, "", "is_valid"], [13, 11, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[13, 11, 1, "", "get_first_kv_cache_block_pointers"], [13, 11, 1, "", "get_first_past_key_value"], [13, 11, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[13, 9, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[13, 9, 1, "", "Conv2d"], [13, 9, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv2d": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[13, 9, 1, "", "Embedding"], [13, 9, 1, "", "PromptTuningEmbedding"]], "tensorrt_llm.layers.embedding.Embedding": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[13, 10, 1, "", "ColumnLinear"], [13, 9, 1, "", "Linear"], [13, 9, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[13, 11, 1, "", "forward"], [13, 11, 1, "", "multiply_gather"]], "tensorrt_llm.layers.linear.RowLinear": [[13, 11, 1, "", "forward"], [13, 11, 1, "", "multiply_reduce"]], "tensorrt_llm.layers.mlp": [[13, 9, 1, "", "GatedMLP"], [13, 9, 1, "", "MLP"]], "tensorrt_llm.layers.mlp.GatedMLP": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[13, 9, 1, "", "GroupNorm"], [13, 9, 1, "", "LayerNorm"], [13, 9, 1, "", "RmsNorm"]], "tensorrt_llm.layers.normalization.GroupNorm": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[13, 11, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[13, 9, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[13, 11, 1, "", "forward"]], "tensorrt_llm.models": [[14, 9, 1, "", "BaichuanForCausalLM"], [14, 9, 1, "", "BertForQuestionAnswering"], [14, 9, 1, "", "BertModel"], [14, 9, 1, "", "BloomForCausalLM"], [14, 9, 1, "", "BloomModel"], [14, 9, 1, "", "ChatGLM2HeadModel"], [14, 9, 1, "", "ChatGLM2Model"], [14, 9, 1, "", "ChatGLM6BHeadModel"], [14, 9, 1, "", "ChatGLM6BModel"], [14, 9, 1, "", "DecoderModel"], [14, 9, 1, "", "EncoderModel"], [14, 9, 1, "", "FalconForCausalLM"], [14, 9, 1, "", "FalconModel"], [14, 9, 1, "", "GPTJForCausalLM"], [14, 9, 1, "", "GPTJModel"], [14, 9, 1, "", "GPTLMHeadModel"], [14, 9, 1, "", "GPTModel"], [14, 9, 1, "", "GPTNeoXForCausalLM"], [14, 9, 1, "", "GPTNeoXModel"], [14, 9, 1, "", "LLaMAForCausalLM"], [14, 9, 1, "", "LLaMAModel"], [14, 9, 1, "", "OPTLMHeadModel"], [14, 9, 1, "", "OPTModel"], [14, 13, 1, "", "fp8_quantize"], [14, 13, 1, "", "smooth_quantize"], [14, 13, 1, "", "weight_only_groupwise_quantize"], [14, 13, 1, "", "weight_only_quantize"]], "tensorrt_llm.models.BaichuanForCausalLM": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.BertForQuestionAnswering": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.BloomForCausalLM": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.BloomModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.ChatGLM2HeadModel": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.ChatGLM2Model": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.ChatGLM6BHeadModel": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.ChatGLM6BModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.DecoderModel": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.FalconForCausalLM": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.FalconModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTJForCausalLM": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTJModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTLMHeadModel": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXForCausalLM": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.GPTNeoXModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.LLaMAForCausalLM": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.LLaMAModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.models.OPTLMHeadModel": [[14, 11, 1, "", "forward"], [14, 11, 1, "", "prepare_inputs"]], "tensorrt_llm.models.OPTModel": [[14, 11, 1, "", "forward"]], "tensorrt_llm.quantization": [[16, 9, 1, "", "QuantMode"]], "tensorrt_llm.runtime": [[17, 9, 1, "", "ChatGLM6BHeadModelGenerationSession"], [17, 9, 1, "", "GenerationSequence"], [17, 9, 1, "", "GenerationSession"], [17, 9, 1, "", "KVCacheManager"], [17, 9, 1, "", "ModelConfig"], [17, 9, 1, "", "SamplingConfig"], [17, 9, 1, "", "Session"], [17, 9, 1, "", "TensorInfo"], [17, 13, 1, "", "to_word_list_format"]], "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession": [[17, 10, 1, "", "batch_size"], [17, 10, 1, "", "buffer_allocated"], [17, 10, 1, "", "cuda_graph_mode"], [17, 10, 1, "", "debug_mode"], [17, 10, 1, "", "debug_tensors_to_save"], [17, 10, 1, "", "device"], [17, 10, 1, "", "mapping"], [17, 10, 1, "", "runtime"]], "tensorrt_llm.runtime.GenerationSequence": [[17, 11, 1, "", "get_batch_idx"], [17, 11, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[17, 10, 1, "", "batch_size"], [17, 10, 1, "", "buffer_allocated"], [17, 12, 1, "", "cross_attention"], [17, 10, 1, "", "cuda_graph_mode"], [17, 11, 1, "", "cuda_stream_guard"], [17, 10, 1, "", "debug_mode"], [17, 10, 1, "", "debug_tensors_to_save"], [17, 11, 1, "", "decode"], [17, 11, 1, "", "decode_batch"], [17, 11, 1, "", "decode_regular"], [17, 11, 1, "", "decode_stream"], [17, 10, 1, "", "device"], [17, 12, 1, "", "dtype"], [17, 11, 1, "", "finalize_decoder"], [17, 12, 1, "", "first_layer"], [17, 12, 1, "", "gather_all_token_logits"], [17, 11, 1, "", "handle_per_step"], [17, 12, 1, "", "has_position_embedding"], [17, 12, 1, "", "has_token_type_embedding"], [17, 12, 1, "", "head_size"], [17, 12, 1, "", "hidden_size"], [17, 12, 1, "", "last_layer"], [17, 10, 1, "", "mapping"], [17, 12, 1, "", "num_heads"], [17, 12, 1, "", "num_heads_kv"], [17, 12, 1, "", "num_layers"], [17, 12, 1, "", "paged_kv_cache"], [17, 11, 1, "", "pp_communicate_final_output_ids"], [17, 11, 1, "", "pp_communicate_new_tokens"], [17, 12, 1, "", "quant_mode"], [17, 12, 1, "", "remove_input_padding"], [17, 10, 1, "", "runtime"], [17, 11, 1, "", "setup"], [17, 12, 1, "", "tokens_per_block"], [17, 12, 1, "", "use_custom_all_reduce"], [17, 12, 1, "", "use_gpt_attention_plugin"], [17, 12, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[17, 11, 1, "", "add_sequence"], [17, 11, 1, "", "get_pointer_arrays"], [17, 11, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[17, 10, 1, "", "cross_attention"], [17, 10, 1, "", "dtype"], [17, 10, 1, "", "gather_all_token_logits"], [17, 10, 1, "", "gpt_attention_plugin"], [17, 10, 1, "", "has_position_embedding"], [17, 10, 1, "", "has_token_type_embedding"], [17, 10, 1, "", "hidden_size"], [17, 10, 1, "", "model_name"], [17, 10, 1, "", "num_heads"], [17, 10, 1, "", "num_kv_heads"], [17, 10, 1, "", "num_layers"], [17, 10, 1, "", "paged_kv_cache"], [17, 10, 1, "", "quant_mode"], [17, 10, 1, "", "remove_input_padding"], [17, 10, 1, "", "tokens_per_block"], [17, 10, 1, "", "use_custom_all_reduce"], [17, 10, 1, "", "use_prompt_tuning"], [17, 10, 1, "", "vocab_size"]], "tensorrt_llm.runtime.SamplingConfig": [[17, 10, 1, "", "beam_search_diversity_rate"], [17, 10, 1, "", "end_id"], [17, 10, 1, "", "length_penalty"], [17, 10, 1, "", "min_length"], [17, 10, 1, "", "num_beams"], [17, 10, 1, "", "output_cum_log_probs"], [17, 10, 1, "", "output_log_probs"], [17, 10, 1, "", "pad_id"], [17, 10, 1, "", "presence_penalty"], [17, 10, 1, "", "random_seed"], [17, 10, 1, "", "repetition_penalty"], [17, 10, 1, "", "temperature"], [17, 10, 1, "", "top_k"], [17, 10, 1, "", "top_p"], [17, 10, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[17, 12, 1, "", "context"], [17, 12, 1, "", "engine"], [17, 11, 1, "", "from_engine"], [17, 11, 1, "", "from_serialized_engine"], [17, 11, 1, "", "infer_shapes"], [17, 11, 1, "", "run"], [17, 12, 1, "", "runtime"]], "tensorrt_llm.runtime.TensorInfo": [[17, 10, 1, "", "dtype"], [17, 10, 1, "", "name"], [17, 10, 1, "", "shape"]]}, "objtypes": {"0": "cpp:type", "1": "cpp:class", "2": "cpp:templateParam", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "py:module", "9": "py:class", "10": "py:attribute", "11": "py:method", "12": "py:property", "13": "py:function"}, "objnames": {"0": ["cpp", "type", "C++ type"], "1": ["cpp", "class", "C++ class"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["py", "module", "Python module"], "9": ["py", "class", "Python class"], "10": ["py", "attribute", "Python attribute"], "11": ["py", "method", "Python method"], "12": ["py", "property", "Python property"], "13": ["py", "function", "Python function"]}, "titleterms": {"how": [0, 1], "add": 0, "new": 0, "model": [0, 1, 3, 6, 14], "step": [0, 9], "debug": 1, "overview": [1, 9], "unit": 1, "test": 1, "e2": 1, "execut": [1, 4], "error": 1, "runtim": [2, 3, 6, 9, 17], "buffermanag": 2, "h": 2, "common": 2, "cudaev": 2, "cudastream": 2, "decodinginput": 2, "decodingoutput": 2, "generationinput": 2, "generationoutput": 2, "gptdecod": 2, "gptdecoderbatch": 2, "gptjsonconfig": 2, "gptmodelconfig": 2, "gptsession": 2, "ibuff": 2, "igptdecoderbatch": 2, "istatefulgptdecod": 2, "itensor": 2, "ipcutil": 2, "memorycount": 2, "samplingconfig": 2, "tllmlogger": 2, "worldconfig": 2, "tensorrt": [3, 4, 8, 9, 10], "llm": [3, 4, 8, 9, 10], "architectur": 3, "definit": 3, "compil": 3, "weight": [3, 11], "bind": 3, "pattern": [3, 7], "match": 3, "fusion": 3, "plugin": [3, 15], "multi": [3, 4, 5], "gpu": [3, 4, 10], "node": 3, "support": [3, 6, 9, 11], "In": [3, 4, 6], "flight": [3, 4, 6], "batch": [3, 4, 5, 6], "The": [4, 6, 11], "manag": [4, 7], "api": [4, 7, 8], "get": 4, "send": 4, "callback": 4, "request": 4, "interrupt": 4, "statist": 4, "gptmanag": 4, "design": 4, "triton": 4, "infer": 4, "server": 4, "head": 5, "queri": 5, "group": 5, "attent": [5, 13], "import": 5, "note": 5, "pad": 5, "pack": 5, "tensor": [5, 7], "context": 5, "gener": [5, 6], "phase": 5, "inflight": 5, "kv": 5, "cach": 5, "": [5, 8], "contigu": 5, "page": 5, "int8": [5, 11], "fp8": [5, 10, 11], "beam": 5, "search": 5, "input": [5, 6], "qkv": 5, "addit": 5, "featur": 5, "rotari": 5, "posit": 5, "embed": [5, 13], "rope": 5, "alibi": 5, "scale": 5, "factor": 5, "cross": 5, "rel": 5, "bia": 5, "rab": 5, "c": [6, 8, 9], "gpt": 6, "session": 6, "creation": 6, "configur": 6, "world": 6, "setup": 6, "output": 6, "sampl": 6, "paramet": 6, "intern": 6, "compon": 6, "know": 6, "issu": [6, 10], "futur": 6, "chang": 6, "graph": 7, "rewrit": 7, "modul": 7, "when": 7, "us": 7, "relat": 7, "method": 7, "flayerinfo": 7, "retriev": 7, "high": [7, 10], "level": 7, "inform": 7, "function": [7, 12], "record_signatur": 7, "decor": 7, "requir": 7, "classic": 7, "workflow": 7, "welcom": 8, "document": 8, "content": 8, "python": 8, "indic": 8, "tabl": 8, "build": 9, "from": 9, "sourc": 9, "fetch": 9, "One": 9, "creat": 9, "contain": 9, "On": 9, "system": 9, "gnu": 9, "make": 9, "without": 9, "link": 9, "header": 9, "file": 9, "perform": 10, "methodologi": 10, "throughput": 10, "h100": 10, "l40": 10, "a100": 10, "fp16": [10, 11], "low": 10, "latenc": 10, "known": 10, "fuse": 10, "matmul": 10, "gate": 10, "silu": 10, "llama": 10, "numer": 11, "precis": 11, "fp32": 11, "bf16": 11, "quantiz": 11, "dequant": 11, "q": 11, "dq": 11, "smoothquant": 11, "w8a8": 11, "int4": 11, "onli": 11, "w4a16": 11, "w8a16": 11, "gptq": 11, "awq": 11, "hopper": 11, "matrix": 11, "technic": 11, "detail": 11, "quantmod": 11, "flag": 11, "layer": 13, "activ": 13, "cast": 13, "conv": 13, "linear": 13, "mlp": 13, "normal": 13, "pool": 13, "qunat": 16}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"How to add a new model": [[0, "how-to-add-a-new-model"]], "Steps": [[0, "steps"]], "How to debug": [[1, "how-to-debug"]], "Overview": [[1, "overview"], [9, "overview"]], "Debug on unit tests": [[1, "debug-on-unit-tests"]], "Debug on E2E models": [[1, "debug-on-e2e-models"]], "Debug execution errors": [[1, "debug-execution-errors"]], "Runtime": [[2, "runtime"], [3, "runtime"], [17, "module-tensorrt_llm"]], "bufferManager.h": [[2, "buffermanager-h"]], "common.h": [[2, "common-h"]], "cudaEvent.h": [[2, "cudaevent-h"]], "cudaStream.h": [[2, "cudastream-h"]], "decodingInput.h": [[2, "decodinginput-h"]], "decodingOutput.h": [[2, "decodingoutput-h"]], "generationInput.h": [[2, "generationinput-h"]], "generationOutput.h": [[2, "generationoutput-h"]], "gptDecoder.h": [[2, "gptdecoder-h"]], "gptDecoderBatch.h": [[2, "gptdecoderbatch-h"]], "gptJsonConfig.h": [[2, "gptjsonconfig-h"]], "gptModelConfig.h": [[2, "gptmodelconfig-h"]], "gptSession.h": [[2, "gptsession-h"]], "iBuffer.h": [[2, "ibuffer-h"]], "iGptDecoderBatch.h": [[2, "igptdecoderbatch-h"]], "iStatefulGptDecoder.h": [[2, "istatefulgptdecoder-h"]], "iTensor.h": [[2, "itensor-h"]], "ipcUtils.h": [[2, "ipcutils-h"]], "memoryCounters.h": [[2, "memorycounters-h"]], "samplingConfig.h": [[2, "samplingconfig-h"]], "tllmLogger.h": [[2, "tllmlogger-h"]], "worldConfig.h": [[2, "worldconfig-h"]], "TensorRT-LLM Architecture": [[3, "tensorrt-llm-architecture"]], "Model Definition": [[3, "model-definition"]], "Compilation": [[3, "compilation"]], "Weight Bindings": [[3, "weight-bindings"]], "Pattern-Matching and Fusion": [[3, "pattern-matching-and-fusion"]], "Plugins": [[3, "plugins"]], "Multi-GPU and Multi-Node Support": [[3, "multi-gpu-and-multi-node-support"]], "In-flight Batching": [[3, "in-flight-batching"]], "The Batch Manager in TensorRT-LLM": [[4, "the-batch-manager-in-tensorrt-llm"]], "The Batch Manager API": [[4, "the-batch-manager-api"]], "Get and Send Callbacks": [[4, "get-and-send-callbacks"]], "Request Interruption": [[4, "request-interruption"]], "Statistics": [[4, "statistics"]], "GptManager Design": [[4, "gptmanager-design"]], "Multi-GPU execution": [[4, "multi-gpu-execution"]], "In-flight Batching with the Triton Inference Server": [[4, "in-flight-batching-with-the-triton-inference-server"]], "Multi-head, Multi-query and Group-query Attention": [[5, "multi-head-multi-query-and-group-query-attention"]], "Important Note": [[5, "important-note"]], "Padded and Packed Tensors": [[5, "padded-and-packed-tensors"]], "Context and Generation Phases": [[5, "context-and-generation-phases"]], "Context Phase": [[5, "context-phase"]], "Generation Phase": [[5, "generation-phase"]], "Inflight batching": [[5, "inflight-batching"]], "KV Cache(s)": [[5, "kv-cache-s"]], "Contiguous KV Cache": [[5, "contiguous-kv-cache"]], "Paged KV Cache": [[5, "paged-kv-cache"]], "INT8/FP8 KV Caches": [[5, "int8-fp8-kv-caches"]], "Beam-Search": [[5, "beam-search"]], "Input QKV tensor": [[5, "input-qkv-tensor"]], "Additional Features": [[5, "additional-features"]], "Rotary Positional Embedding (RoPE)": [[5, "rotary-positional-embedding-rope"]], "ALiBi": [[5, "alibi"]], "Scaling factor(s)": [[5, "scaling-factor-s"]], "Cross Attention": [[5, "cross-attention"]], "Relative Attention Bias (RAB)": [[5, "relative-attention-bias-rab"]], "C++ GPT Runtime": [[6, "c-gpt-runtime"]], "The Session": [[6, "the-session"]], "Creation": [[6, "creation"]], "Model Configuration": [[6, "model-configuration"]], "World Configuration": [[6, "world-configuration"]], "Setup": [[6, "setup"]], "Generation": [[6, "generation"]], "Inputs and Outputs": [[6, "inputs-and-outputs"]], "Sampling Parameters": [[6, "sampling-parameters"]], "Internal Components": [[6, "internal-components"]], "In-flight Batching Support": [[6, "in-flight-batching-support"]], "Know Issues and Future Changes": [[6, "know-issues-and-future-changes"]], "Graph Rewriting Module": [[7, "graph-rewriting-module"]], "When to Use Graph Rewriting?": [[7, "when-to-use-graph-rewriting"]], "Graph Rewriting APIs": [[7, "graph-rewriting-apis"]], "Tensor-Related Methods": [[7, "tensor-related-methods"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[7, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "Pattern and Pattern Manager": [[7, "pattern-and-pattern-manager"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[7, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "Classical Workflow": [[7, "classical-workflow"]], "Welcome to TensorRT-LLM\u2019s documentation!": [[8, "welcome-to-tensorrt-llm-s-documentation"]], "Contents:": [[8, null]], "Python API": [[8, "python-api"]], "C++ API": [[8, "c-api"]], "Indices and tables": [[8, "indices-and-tables"]], "Build From Sources": [[9, "build-from-sources"]], "Fetch the Sources": [[9, "fetch-the-sources"]], "Build TensorRT-LLM in One Step": [[9, "build-tensorrt-llm-in-one-step"]], "Build Step-by-step": [[9, "build-step-by-step"]], "Create the Container": [[9, "create-the-container"]], "On Systems with GNU make": [[9, "on-systems-with-gnu-make"]], "On Systems Without GNU make": [[9, "on-systems-without-gnu-make"]], "Build TensorRT-LLM": [[9, "build-tensorrt-llm"]], "Link with the TensorRT-LLM C++ Runtime": [[9, "link-with-the-tensorrt-llm-c-runtime"]], "Supported C++ Header Files": [[9, "supported-c-header-files"]], "Performance of TensorRT-LLM": [[10, "performance-of-tensorrt-llm"]], "Methodology": [[10, "methodology"]], "High Throughput": [[10, "high-throughput"]], "H100 GPUs (FP8)": [[10, "h100-gpus-fp8"], [10, "id1"]], "L40S GPUs (FP8)": [[10, "l40s-gpus-fp8"], [10, "id2"]], "A100 GPUs (FP16)": [[10, "a100-gpus-fp16"], [10, "id3"]], "Low Latency": [[10, "low-latency"]], "Known Issues": [[10, "known-issues"]], "Fused Matmul + Gated-SiLU (LLaMA)": [[10, "fused-matmul-gated-silu-llama"]], "Numerical Precision": [[11, "numerical-precision"]], "FP32, FP16 and BF16": [[11, "fp32-fp16-and-bf16"]], "Quantization and Dequantization (Q/DQ)": [[11, "quantization-and-dequantization-q-dq"]], "INT8 SmoothQuant (W8A8)": [[11, "int8-smoothquant-w8a8"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[11, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "GPTQ and AWQ (W4A16)": [[11, "gptq-and-awq-w4a16"]], "FP8 (Hopper)": [[11, "fp8-hopper"]], "Support matrix": [[11, "support-matrix"]], "Technical Detail: The QuantMode Flags": [[11, "technical-detail-the-quantmode-flags"]], "Functionals": [[12, "module-tensorrt_llm"]], "Layers": [[13, "module-tensorrt_llm"]], "Activation": [[13, "module-tensorrt_llm.layers.activation"]], "Attention": [[13, "module-tensorrt_llm.layers.attention"]], "Cast": [[13, "module-tensorrt_llm.layers.cast"]], "Conv": [[13, "module-tensorrt_llm.layers.conv"]], "Embedding": [[13, "module-tensorrt_llm.layers.embedding"]], "Linear": [[13, "module-tensorrt_llm.layers.linear"]], "MLP": [[13, "module-tensorrt_llm.layers.mlp"]], "Normalization": [[13, "normalization"]], "Pooling": [[13, "module-tensorrt_llm.layers.pooling"]], "Models": [[14, "module-tensorrt_llm"]], "Plugin": [[15, "module-tensorrt_llm"]], "Quantization": [[16, "module-tensorrt_llm"]]}, "indexentries": {"nvinfer1 (c++ type)": [[2, "_CPPv48nvinfer1"]], "tensorrt_llm (c++ type)": [[2, "_CPPv412tensorrt_llm"]], "tensorrt_llm::batch_manager (c++ type)": [[2, "_CPPv4N12tensorrt_llm13batch_managerE"]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[2, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE"]], "tensorrt_llm::layers (c++ type)": [[2, "_CPPv4N12tensorrt_llm6layersE"]], "tensorrt_llm::layers::dynamicdecodelayer (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm6layers18DynamicDecodeLayerE"]], "tensorrt_llm::runtime (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtimeE"]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb"]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv"]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv"]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv"]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv"]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE"]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE"]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv"]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE"]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtr"]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE"]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE"]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer"]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType"], [2, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType"]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv"]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::initmemorypool (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi"]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE"]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE"]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [2, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer"]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE"]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer"]], "tensorrt_llm::runtime::bufferrange::begin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5beginEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange5beginEv"]], "tensorrt_llm::runtime::bufferrange::cbegin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange6cbeginEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange6cbeginEv"]], "tensorrt_llm::runtime::bufferrange::cend (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange4cendEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4cendEv"]], "tensorrt_llm::runtime::bufferrange::const_iterator (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange14const_iteratorE"]], "tensorrt_llm::runtime::bufferrange::const_pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange13const_pointerE"]], "tensorrt_llm::runtime::bufferrange::const_reference (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange15const_referenceE"]], "tensorrt_llm::runtime::bufferrange::end (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange3endEv"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange3endEv"]], "tensorrt_llm::runtime::bufferrange::iterator (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange8iteratorE"]], "tensorrt_llm::runtime::bufferrange::mdata (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mDataE"]], "tensorrt_llm::runtime::bufferrange::msize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange5mSizeE"]], "tensorrt_llm::runtime::bufferrange::operator[] (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRangeixE9size_type"], [2, "_CPPv4NK12tensorrt_llm7runtime11BufferRangeixE9size_type"]], "tensorrt_llm::runtime::bufferrange::pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange7pointerE"]], "tensorrt_llm::runtime::bufferrange::reference (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange9referenceE"]], "tensorrt_llm::runtime::bufferrange::size (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11BufferRange4sizeEv"]], "tensorrt_llm::runtime::bufferrange::size_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange9size_typeE"]], "tensorrt_llm::runtime::bufferrange::value_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11BufferRange10value_typeE"]], "tensorrt_llm::runtime::cppdatatype (c++ struct)": [[2, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime11CppDataTypeE"]], "tensorrt_llm::runtime::cppdatatype<kdatatype, kunsigned, true> (c++ struct)": [[2, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime11CppDataTypeI9kDataType9kUnsignedXL1EEEE"]], "tensorrt_llm::runtime::cppdatatype<kdatatype, kunsigned, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeI9kDataType9kUnsignedXL1EEE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[2, "_CPPv4I_bEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kBOOLE9kUnsignedEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kfloat> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kFLOATEEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kfloat>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kFLOATEE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::khalf> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kHALFEEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::khalf>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kHALFEE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint32, true> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EXL1EEEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint32, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EXL1EEE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint32> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint32>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT32EE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint64, true> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EXL1EEEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint64, true>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EXL1EEE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint64> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint64>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kINT64EE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint8> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kINT8EEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kint8>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType5kINT8EE4typeE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[2, "_CPPv4I_bEN12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kUINT8E9kUnsignedEE"]], "tensorrt_llm::runtime::cppdatatype<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11CppDataTypeIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE"]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEventE"]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb"], [2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj"]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE"]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb"], [2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE"]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer"]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE"]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE"]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv"]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE"]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE"]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv"]], "tensorrt_llm::runtime::cudastream (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE"]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib"], [2, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji"]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE"]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb"], [2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE"]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t"]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE"]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv"]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv"]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE"]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE"]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE"], [2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent"]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv"]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE"], [2, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent"]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE"]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE"]], "tensorrt_llm::runtime::decodinginput::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE"]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE"]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE"]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE"]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE"]], "tensorrt_llm::runtime::decodinginput::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE"]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE"]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE"]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE"]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE"]], "tensorrt_llm::runtime::decodinginput::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE"]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::isdone (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscores (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscores (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidstgt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthstgt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr"]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::finished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE"]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE"]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE"]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE"]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE"]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE"]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE"]], "tensorrt_llm::runtime::generationinput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE"]], "tensorrt_llm::runtime::generationinput::generationinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::generationinput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::generationinput::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput12badWordsListE"]], "tensorrt_llm::runtime::generationinput::embeddingbiasopt (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput16embeddingBiasOptE"]], "tensorrt_llm::runtime::generationinput::endid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput5endIdE"]], "tensorrt_llm::runtime::generationinput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput3idsE"]], "tensorrt_llm::runtime::generationinput::lengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput7lengthsE"]], "tensorrt_llm::runtime::generationinput::maxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput12maxNewTokensE"]], "tensorrt_llm::runtime::generationinput::packed (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput6packedE"]], "tensorrt_llm::runtime::generationinput::padid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput5padIdE"]], "tensorrt_llm::runtime::generationinput::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GenerationInput13stopWordsListE"]], "tensorrt_llm::runtime::generationoutput (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE"]], "tensorrt_llm::runtime::generationoutput::callback (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput8CallbackE"]], "tensorrt_llm::runtime::generationoutput::generationoutput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr"]], "tensorrt_llm::runtime::generationoutput::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::generationoutput::contextlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput13contextLogitsE"]], "tensorrt_llm::runtime::generationoutput::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput3idsE"]], "tensorrt_llm::runtime::generationoutput::logprobs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput8logProbsE"]], "tensorrt_llm::runtime::generationoutput::ontokengenerated (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16onTokenGeneratedE"]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE"]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderE6size_t6size_tRK13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::mallocator (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mAllocatorE"]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE"]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE"]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t"]], "tensorrt_llm::runtime::gptdecoderbatch (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE"]], "tensorrt_llm::runtime::gptdecoderbatch::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodinginputptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodingoutputptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"], [2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"]], "tensorrt_llm::runtime::gptdecoderbatch::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getfinaloutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch17getFinalOutputIdsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch17getFinalOutputIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getfinishedbeams (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch16getFinishedBeamsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbsteps (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType"], [2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getoutputlengths (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch16getOutputLengthsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getparentids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::isfinishedsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14isFinishedSyncEv"]], "tensorrt_llm::runtime::gptdecoderbatch::mactualbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbeamwidths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbuffermanager (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecoders (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodinginputs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodingoutputs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinished (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinishedsum (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardtoken (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodinginput (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodingoutput (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnbsteps (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstreams (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsizepadded (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE"]], "tensorrt_llm::runtime::gptdecoderbatch::newbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequest (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::postprocessrequest (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig"]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig"], [2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE"]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv"]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv"]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv"]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv"]], "tensorrt_llm::runtime::gptjsonconfig::mgptmodelconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE"]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE"]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE"], [2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE"], [2, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE"]], "tensorrt_llm::runtime::gptmodelconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE"]], "tensorrt_llm::runtime::gptmodelconfig::gptmodelconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant (c++ enum)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kglm (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kgpt (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE"]], "tensorrt_llm::runtime::gptmodelconfig::computecontextlogits (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv"]], "tensorrt_llm::runtime::gptmodelconfig::gethiddensize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxbatchsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxinputlen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxnumtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxoutputlen (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxOutputLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmodelvariant (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbheads (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbkvheads (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnblayers (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::getquantmode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getsizeperhead (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv"]], "tensorrt_llm::runtime::gptmodelconfig::gettokensperblock (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsizepadded (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputecontextlogits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mdatatype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::mhiddensize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::minputpacked (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxbatchsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxinputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxnumtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxoutputlen (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxOutputLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmodelvariant (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbheads (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbkvheads (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnblayers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedkvcache (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE"]], "tensorrt_llm::runtime::gptmodelconfig::mquantmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::mtokensperblock (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE"]], "tensorrt_llm::runtime::gptmodelconfig::musecustomallreduce (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE"]], "tensorrt_llm::runtime::gptmodelconfig::musegptattentionplugin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::mvocabsize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxbatchsize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxinputlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxnumtokens (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxoutputlen (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxOutputLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmodelvariant (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant"]], "tensorrt_llm::runtime::gptmodelconfig::setnbkvheads (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setquantmode (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::settokensperblock (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::supportsinflightbatching (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv"]], "tensorrt_llm::runtime::gptmodelconfig::usecustomallreduce (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv"]], "tensorrt_llm::runtime::gptmodelconfig::usegptattentionplugin (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepackedinput (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepagedkvcache (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb"], [2, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv"]], "tensorrt_llm::runtime::gptsession (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSessionE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::cudagraphexecutor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::clear (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::create (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::cudagraphexecptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16cudaGraphExecPtrE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::hasinstance (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::launch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::minstance (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::preparenextgraph (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::update (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::uploadtostream (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::~cudagraphexecutor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev"]], "tensorrt_llm::runtime::gptsession::gptsession (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr"], [2, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr"]], "tensorrt_llm::runtime::gptsession::kvcachemanager (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::loggerptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE"]], "tensorrt_llm::runtime::gptsession::createbuffers (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType"]], "tensorrt_llm::runtime::gptsession::createcontexts (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsE8SizeType"]], "tensorrt_llm::runtime::gptsession::createcustomallreduceworkspace (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::createdecoders (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeType"]], "tensorrt_llm::runtime::gptsession::createkvcachemanagers (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession21createKvCacheManagersE8SizeType8SizeType8SizeType8SizeTypeNSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::gptsession::decoderstepasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncERN7ITensor9SharedPtrERN7ITensor9SharedPtrE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::finalizeoutputids (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession17finalizeOutputIdsER7ITensor8SizeType"]], "tensorrt_llm::runtime::gptsession::generate (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptsession::generatemultibatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession18generateMultiBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptsession::generatesinglebatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19generateSingleBatchER16GenerationOutputRK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptsession::getbuffermanager (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv"]], "tensorrt_llm::runtime::gptsession::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv"]], "tensorrt_llm::runtime::gptsession::getlogger (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv"]], "tensorrt_llm::runtime::gptsession::getmodelconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv"]], "tensorrt_llm::runtime::gptsession::getworldconfig (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv"]], "tensorrt_llm::runtime::gptsession::initnewtokens (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13initNewTokensERK15GenerationInputRK14SamplingConfig8SizeType"]], "tensorrt_llm::runtime::gptsession::iscudagraphmode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime10GptSession15isCudaGraphModeEv"]], "tensorrt_llm::runtime::gptsession::kvcacheaddsequences (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::mbuffers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE"]], "tensorrt_llm::runtime::gptsession::mcommevent (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE"]], "tensorrt_llm::runtime::gptsession::mcommstream (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE"]], "tensorrt_llm::runtime::gptsession::mcudagraphinstances (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE"]], "tensorrt_llm::runtime::gptsession::mcudagraphmode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxsequencelength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::mdecoders (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE"]], "tensorrt_llm::runtime::gptsession::mdevice (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE"]], "tensorrt_llm::runtime::gptsession::mkvcachemanagers (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16mKvCacheManagersE"]], "tensorrt_llm::runtime::gptsession::mlogger (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE"]], "tensorrt_llm::runtime::gptsession::mmodelconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE"]], "tensorrt_llm::runtime::gptsession::mnummicrobatches (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16mNumMicroBatchesE"]], "tensorrt_llm::runtime::gptsession::mpipelinecomm (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE"]], "tensorrt_llm::runtime::gptsession::mreceivedevents (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE"]], "tensorrt_llm::runtime::gptsession::mruntime (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE"]], "tensorrt_llm::runtime::gptsession::mworldconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE"]], "tensorrt_llm::runtime::gptsession::setcudagraphmode (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession16setCudaGraphModeEb"]], "tensorrt_llm::runtime::gptsession::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupE8SizeType8SizeType8SizeTypebNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::gptsession::shouldstopsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferE"]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE"]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv"]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE"]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE"]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv"], [2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv"]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv"]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv"]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv"]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv"]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv"]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer"]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv"]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev"]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE"]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createEN8nvinfer18DataTypeE6size_t6size_tRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::gathertree (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t"]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev"]], "tensorrt_llm::runtime::igptdecoderbatch (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE"]], "tensorrt_llm::runtime::igptdecoderbatch::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::igptdecoderbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv"]], "tensorrt_llm::runtime::igptdecoderbatch::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::tokenptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"]], "tensorrt_llm::runtime::igptdecoderbatch::getcumlogprobs (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getfinaloutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch17getFinalOutputIdsE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::getfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getfinishedbeams (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch16getFinishedBeamsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getnbsteps (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::getoutputlengths (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch16getOutputLengthsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getparentids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::newrequest (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE"]], "tensorrt_llm::runtime::istatefulgptdecoder::cudastreamptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::istatefulgptdecoder (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forward (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardasync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::getfinaloutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder17getFinalOutputIdsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnbfinished (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnewtokens (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getoutputids (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::isfinishedsync (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder14isFinishedSyncEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::newbatch (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder::setup (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::itensor (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensorE"]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv"]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE"]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE"]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE"]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE"]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE"]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv"]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE"]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor"]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape"]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape"]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[2, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape"]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape"]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape"]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape"], [2, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE"]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev"]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE"]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE"]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryE11WorldConfigNSt6size_tE"]], "tensorrt_llm::runtime::ipcmemory::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE"]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::getcommptrstensor (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv"]], "tensorrt_llm::runtime::ipcmemory::mbufferptr (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE"]], "tensorrt_llm::runtime::ipcmemory::mbuffersize (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE"]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE"]], "tensorrt_llm::runtime::ipcmemory::mworldconfig (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE"]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev"]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE"]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE"]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv"]], "tensorrt_llm::runtime::memorycounters::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE"]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei"]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType"], [2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv"]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv"]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv"]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv"]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv"]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE"]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE"]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE"]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE"]], "tensorrt_llm::runtime::memorycounters::minstance (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters9mInstanceE"]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE"]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE"]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE"]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE"]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE"]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[2, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE"]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[2, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE"]], "tensorrt_llm::runtime::phonynameduetoerror::type (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE"]], "tensorrt_llm::runtime::phonynameduetoerror::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE"]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE"]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE"]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE"]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE"]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType"]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE"]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE"]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE"]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE"]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE"]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE"]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE"]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE"]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE"]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE"]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE"]], "tensorrt_llm::runtime::sizetype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE"]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE"]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[2, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE"]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE"]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE"]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE"]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE"]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE"]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[2, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE"]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE"]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE"]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv"]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE"]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity"]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE"]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE"]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv"]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv"]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv"]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[2, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv"]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE"]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"], [2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiERN8nvinfer17ILoggerE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::worldconfig::validconfig (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11validConfigERN8nvinfer17ILoggerE8SizeType8SizeType"]], "tensorrt_llm::runtime::buffercast (c++ function)": [[2, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer"], [2, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer"]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[2, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE"], [2, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE"]], "tensorrt_llm::runtime::decoder (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoderE"]], "tensorrt_llm::runtime::decoder::input (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE"]], "tensorrt_llm::runtime::decoder::input::input (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr"]], "tensorrt_llm::runtime::decoder::input::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder::input::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::input::logits (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE"]], "tensorrt_llm::runtime::decoder::output (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE"]], "tensorrt_llm::runtime::decoder::output::output (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv"]], "tensorrt_llm::runtime::decoder::output::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE"]], "tensorrt_llm::runtime::decoder::output::cacheindirection (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::output::sequencelengths (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE"]], "tensorrt_llm::runtime::decoder_batch (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE"]], "tensorrt_llm::runtime::decoder_batch::input (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE"]], "tensorrt_llm::runtime::decoder_batch::input::base (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input4BaseE"]], "tensorrt_llm::runtime::decoder_batch::input::input (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtr"], [2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputE9TensorPtrRKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::input::active (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE"]], "tensorrt_llm::runtime::decoder_batch::output (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE"]], "tensorrt_llm::runtime::decoder_batch::request (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE"]], "tensorrt_llm::runtime::decoder_batch::request::request (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE9TensorPtrNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::decoder_batch::request::tensorptr (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::badwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE"]], "tensorrt_llm::runtime::decoder_batch::request::embeddingbias (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE"]], "tensorrt_llm::runtime::decoder_batch::request::endid (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE"]], "tensorrt_llm::runtime::decoder_batch::request::ids (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE"]], "tensorrt_llm::runtime::decoder_batch::request::maxnewtokens (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::stopwordslist (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE"]], "tensorrt_llm::runtime::decoder_batch::token (c++ class)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE"]], "tensorrt_llm::runtime::decoder_batch::token::token (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::token::active (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE"]], "tensorrt_llm::runtime::decoder_batch::token::event (c++ member)": [[2, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE"]], "tensorrt_llm::runtime::operator<< (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer"], [2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor"], [2, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE"]], "tensorrt_llm::runtime::setpeeraccess (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessE11WorldConfigb"]], "tensorrt_llm::runtime::utils (c++ type)": [[2, "_CPPv4N12tensorrt_llm7runtime5utilsE"]], "tensorrt_llm::runtime::utils::loadengine (c++ function)": [[2, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE"]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[12, "tensorrt_llm.functional.AllReduceStrategy.AUTO"]], "allreducestrategy (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.AllReduceStrategy"]], "attentionmasktype (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.AttentionMaskType"]], "dimrange (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.DimRange"]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[12, "tensorrt_llm.functional.LayerNormType.GroupNorm"]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[12, "tensorrt_llm.functional.LayerNormType.LayerNorm"]], "layernormpositiontype (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.LayerNormPositionType"]], "layernormtype (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.LayerNormType"]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[12, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT"]], "positionembeddingtype (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.PositionEmbeddingType"]], "ring (tensorrt_llm.functional.allreducestrategy attribute)": [[12, "tensorrt_llm.functional.AllReduceStrategy.RING"]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[12, "tensorrt_llm.functional.LayerNormType.RmsNorm"]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.RotaryScalingType"]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[12, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT"]], "tensor (class in tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.Tensor"]], "abs() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.abs"]], "abs() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.abs"]], "activation() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.activation"]], "add() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.add"]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.alibi"]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale"]], "allgather() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.allgather"]], "allreduce() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.allreduce"]], "arange() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.arange"]], "argmax() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.argmax"]], "assertion() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.assertion"]], "avg_pool2d() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.avg_pool2d"]], "bert_attention() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.bert_attention"]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[12, "tensorrt_llm.functional.AttentionMaskType.bidirectional"]], "broadcast_helper() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.broadcast_helper"]], "cast() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.cast"]], "cast() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.cast"]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[12, "tensorrt_llm.functional.AttentionMaskType.causal"]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.choices"]], "chunk() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.chunk"]], "clip() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.clip"]], "concat() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.concat"]], "constant() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.constant"]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.constant_to_tensor_"]], "conv2d() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.conv2d"]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.conv_transpose2d"]], "cos() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.cos"]], "div() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.div"]], "dtype (tensorrt_llm.functional.tensor property)": [[12, "tensorrt_llm.functional.Tensor.dtype"]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[12, "tensorrt_llm.functional.RotaryScalingType.dynamic"]], "einsum() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.einsum"]], "elementwise_binary() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.elementwise_binary"]], "embedding() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.embedding"]], "eq() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.eq"]], "exp() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.exp"]], "expand() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.expand"]], "expand_dims() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.expand_dims"]], "expand_dims_like() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.expand_dims_like"]], "expand_mask() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.expand_mask"]], "flip() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.flip"]], "gather() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.gather"]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.gather_last_token_logits"]], "geglu() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.geglu"]], "gelu() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.gelu"]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.generate_alibi_biases"]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.generate_alibi_slopes"]], "get_parent() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.get_parent"]], "get_users() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.get_users"]], "gpt_attention() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.gpt_attention"]], "group_norm() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.group_norm"]], "gt() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.gt"]], "identity() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.identity"]], "index_select() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.index_select"]], "interpolate() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.interpolate"]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi"]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.is_dynamic"]], "is_gated_activation() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.is_gated_activation"]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.is_rope"]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.is_trt_wrapper"]], "layer_norm() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.layer_norm"]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute"]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[12, "tensorrt_llm.functional.RotaryScalingType.linear"]], "location (tensorrt_llm.functional.tensor property)": [[12, "tensorrt_llm.functional.Tensor.location"]], "lt() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.lt"]], "mark_output() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.mark_output"]], "matmul() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.matmul"]], "max() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.max"]], "max() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.max"]], "maximum() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.maximum"]], "mean() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.mean"]], "mean() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.mean"]], "minimum() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.minimum"]], "module": [[12, "module-tensorrt_llm"], [12, "module-tensorrt_llm.functional"], [13, "module-tensorrt_llm"], [13, "module-tensorrt_llm.layers.activation"], [13, "module-tensorrt_llm.layers.attention"], [13, "module-tensorrt_llm.layers.cast"], [13, "module-tensorrt_llm.layers.conv"], [13, "module-tensorrt_llm.layers.embedding"], [13, "module-tensorrt_llm.layers.linear"], [13, "module-tensorrt_llm.layers.mlp"], [13, "module-tensorrt_llm.layers.normalization"], [13, "module-tensorrt_llm.layers.pooling"], [14, "module-tensorrt_llm"], [14, "module-tensorrt_llm.models"], [15, "module-tensorrt_llm"], [15, "module-tensorrt_llm.plugin"], [16, "module-tensorrt_llm"], [16, "module-tensorrt_llm.quantization"], [17, "module-tensorrt_llm"], [17, "module-tensorrt_llm.runtime"]], "mul() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.mul"]], "name (tensorrt_llm.functional.tensor property)": [[12, "tensorrt_llm.functional.Tensor.name"]], "ndim() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.ndim"]], "non_gated_version() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.non_gated_version"]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[12, "tensorrt_llm.functional.RotaryScalingType.none"]], "op_and() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.op_and"]], "op_or() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.op_or"]], "outer() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.outer"]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[12, "tensorrt_llm.functional.AttentionMaskType.padding"]], "permute() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.permute"]], "permute() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.permute"]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[12, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm"]], "pow() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.pow"]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[12, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm"]], "rank() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.rank"]], "recv() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.recv"]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.relative"]], "relu() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.relu"]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.replace_all_uses_with"]], "rms_norm() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.rms_norm"]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox"]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[12, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj"]], "round() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.round"]], "select() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.select"]], "send() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.send"]], "shape (tensorrt_llm.functional.tensor property)": [[12, "tensorrt_llm.functional.Tensor.shape"]], "shape() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.shape"]], "sigmoid() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.sigmoid"]], "silu() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.silu"]], "sin() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.sin"]], "size() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.size"]], "slice() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.slice"]], "softmax() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.softmax"]], "softplus() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.softplus"]], "split() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.split"]], "split() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.split"]], "sqrt() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.sqrt"]], "sqrt() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.sqrt"]], "squared_relu() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.squared_relu"]], "sub() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.sub"]], "swiglu() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.swiglu"]], "tanh() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.tanh"]], "tensorrt_llm": [[12, "module-tensorrt_llm"], [13, "module-tensorrt_llm"], [14, "module-tensorrt_llm"], [15, "module-tensorrt_llm"], [16, "module-tensorrt_llm"], [17, "module-tensorrt_llm"]], "tensorrt_llm.functional": [[12, "module-tensorrt_llm.functional"]], "transpose() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.transpose"]], "transpose() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.transpose"]], "unary() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.unary"]], "unsqueeze() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.unsqueeze"]], "view() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.view"]], "view() (tensorrt_llm.functional.tensor method)": [[12, "tensorrt_llm.functional.Tensor.view"]], "where() (in module tensorrt_llm.functional)": [[12, "tensorrt_llm.functional.where"]], "attention (class in tensorrt_llm.layers.attention)": [[13, "tensorrt_llm.layers.attention.Attention"]], "attentionparams (class in tensorrt_llm.layers.attention)": [[13, "tensorrt_llm.layers.attention.AttentionParams"]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[13, "tensorrt_llm.layers.pooling.AvgPool2d"]], "bertattention (class in tensorrt_llm.layers.attention)": [[13, "tensorrt_llm.layers.attention.BertAttention"]], "cast (class in tensorrt_llm.layers.cast)": [[13, "tensorrt_llm.layers.cast.Cast"]], "columnlinear (in module tensorrt_llm.layers.linear)": [[13, "tensorrt_llm.layers.linear.ColumnLinear"]], "conv2d (class in tensorrt_llm.layers.conv)": [[13, "tensorrt_llm.layers.conv.Conv2d"]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[13, "tensorrt_llm.layers.conv.ConvTranspose2d"]], "embedding (class in tensorrt_llm.layers.embedding)": [[13, "tensorrt_llm.layers.embedding.Embedding"]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[13, "tensorrt_llm.layers.mlp.GatedMLP"]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[13, "tensorrt_llm.layers.normalization.GroupNorm"]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[13, "tensorrt_llm.layers.attention.KeyValueCacheParams"]], "layernorm (class in tensorrt_llm.layers.normalization)": [[13, "tensorrt_llm.layers.normalization.LayerNorm"]], "linear (class in tensorrt_llm.layers.linear)": [[13, "tensorrt_llm.layers.linear.Linear"]], "mlp (class in tensorrt_llm.layers.mlp)": [[13, "tensorrt_llm.layers.mlp.MLP"]], "mish (class in tensorrt_llm.layers.activation)": [[13, "tensorrt_llm.layers.activation.Mish"]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[13, "tensorrt_llm.layers.embedding.PromptTuningEmbedding"]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[13, "tensorrt_llm.layers.normalization.RmsNorm"]], "rowlinear (class in tensorrt_llm.layers.linear)": [[13, "tensorrt_llm.layers.linear.RowLinear"]], "forward() (tensorrt_llm.layers.activation.mish method)": [[13, "tensorrt_llm.layers.activation.Mish.forward"]], "forward() (tensorrt_llm.layers.attention.attention method)": [[13, "tensorrt_llm.layers.attention.Attention.forward"]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[13, "tensorrt_llm.layers.attention.BertAttention.forward"]], "forward() (tensorrt_llm.layers.cast.cast method)": [[13, "tensorrt_llm.layers.cast.Cast.forward"]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[13, "tensorrt_llm.layers.conv.Conv2d.forward"]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[13, "tensorrt_llm.layers.conv.ConvTranspose2d.forward"]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[13, "tensorrt_llm.layers.embedding.Embedding.forward"]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[13, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward"]], "forward() (tensorrt_llm.layers.linear.linear method)": [[13, "tensorrt_llm.layers.linear.Linear.forward"]], "forward() (tensorrt_llm.layers.linear.rowlinear method)": [[13, "tensorrt_llm.layers.linear.RowLinear.forward"]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[13, "tensorrt_llm.layers.mlp.GatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[13, "tensorrt_llm.layers.mlp.MLP.forward"]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[13, "tensorrt_llm.layers.normalization.GroupNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[13, "tensorrt_llm.layers.normalization.LayerNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[13, "tensorrt_llm.layers.normalization.RmsNorm.forward"]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[13, "tensorrt_llm.layers.pooling.AvgPool2d.forward"]], "get_first_kv_cache_block_pointers() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[13, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_kv_cache_block_pointers"]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[13, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value"]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[13, "tensorrt_llm.layers.attention.AttentionParams.is_valid"]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[13, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid"]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[13, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn"]], "multiply_gather() (tensorrt_llm.layers.linear.linear method)": [[13, "tensorrt_llm.layers.linear.Linear.multiply_gather"]], "multiply_reduce() (tensorrt_llm.layers.linear.rowlinear method)": [[13, "tensorrt_llm.layers.linear.RowLinear.multiply_reduce"]], "tensorrt_llm.layers.activation": [[13, "module-tensorrt_llm.layers.activation"]], "tensorrt_llm.layers.attention": [[13, "module-tensorrt_llm.layers.attention"]], "tensorrt_llm.layers.cast": [[13, "module-tensorrt_llm.layers.cast"]], "tensorrt_llm.layers.conv": [[13, "module-tensorrt_llm.layers.conv"]], "tensorrt_llm.layers.embedding": [[13, "module-tensorrt_llm.layers.embedding"]], "tensorrt_llm.layers.linear": [[13, "module-tensorrt_llm.layers.linear"]], "tensorrt_llm.layers.mlp": [[13, "module-tensorrt_llm.layers.mlp"]], "tensorrt_llm.layers.normalization": [[13, "module-tensorrt_llm.layers.normalization"]], "tensorrt_llm.layers.pooling": [[13, "module-tensorrt_llm.layers.pooling"]], "baichuanforcausallm (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.BaichuanForCausalLM"]], "bertforquestionanswering (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.BertForQuestionAnswering"]], "bertmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.BertModel"]], "bloomforcausallm (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.BloomForCausalLM"]], "bloommodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.BloomModel"]], "chatglm2headmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.ChatGLM2HeadModel"]], "chatglm2model (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.ChatGLM2Model"]], "chatglm6bheadmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.ChatGLM6BHeadModel"]], "chatglm6bmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.ChatGLM6BModel"]], "decodermodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.DecoderModel"]], "encodermodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.EncoderModel"]], "falconforcausallm (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.FalconForCausalLM"]], "falconmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.FalconModel"]], "gptjforcausallm (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.GPTJForCausalLM"]], "gptjmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.GPTJModel"]], "gptlmheadmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.GPTLMHeadModel"]], "gptmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.GPTModel"]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.GPTNeoXForCausalLM"]], "gptneoxmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.GPTNeoXModel"]], "llamaforcausallm (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.LLaMAForCausalLM"]], "llamamodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.LLaMAModel"]], "optlmheadmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.OPTLMHeadModel"]], "optmodel (class in tensorrt_llm.models)": [[14, "tensorrt_llm.models.OPTModel"]], "forward() (tensorrt_llm.models.baichuanforcausallm method)": [[14, "tensorrt_llm.models.BaichuanForCausalLM.forward"]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[14, "tensorrt_llm.models.BertForQuestionAnswering.forward"]], "forward() (tensorrt_llm.models.bertmodel method)": [[14, "tensorrt_llm.models.BertModel.forward"]], "forward() (tensorrt_llm.models.bloomforcausallm method)": [[14, "tensorrt_llm.models.BloomForCausalLM.forward"]], "forward() (tensorrt_llm.models.bloommodel method)": [[14, "tensorrt_llm.models.BloomModel.forward"]], "forward() (tensorrt_llm.models.chatglm2headmodel method)": [[14, "tensorrt_llm.models.ChatGLM2HeadModel.forward"]], "forward() (tensorrt_llm.models.chatglm2model method)": [[14, "tensorrt_llm.models.ChatGLM2Model.forward"]], "forward() (tensorrt_llm.models.chatglm6bheadmodel method)": [[14, "tensorrt_llm.models.ChatGLM6BHeadModel.forward"]], "forward() (tensorrt_llm.models.chatglm6bmodel method)": [[14, "tensorrt_llm.models.ChatGLM6BModel.forward"]], "forward() (tensorrt_llm.models.decodermodel method)": [[14, "tensorrt_llm.models.DecoderModel.forward"]], "forward() (tensorrt_llm.models.encodermodel method)": [[14, "tensorrt_llm.models.EncoderModel.forward"]], "forward() (tensorrt_llm.models.falconforcausallm method)": [[14, "tensorrt_llm.models.FalconForCausalLM.forward"]], "forward() (tensorrt_llm.models.falconmodel method)": [[14, "tensorrt_llm.models.FalconModel.forward"]], "forward() (tensorrt_llm.models.gptjforcausallm method)": [[14, "tensorrt_llm.models.GPTJForCausalLM.forward"]], "forward() (tensorrt_llm.models.gptjmodel method)": [[14, "tensorrt_llm.models.GPTJModel.forward"]], "forward() (tensorrt_llm.models.gptlmheadmodel method)": [[14, "tensorrt_llm.models.GPTLMHeadModel.forward"]], "forward() (tensorrt_llm.models.gptmodel method)": [[14, "tensorrt_llm.models.GPTModel.forward"]], "forward() (tensorrt_llm.models.gptneoxforcausallm method)": [[14, "tensorrt_llm.models.GPTNeoXForCausalLM.forward"]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[14, "tensorrt_llm.models.GPTNeoXModel.forward"]], "forward() (tensorrt_llm.models.llamaforcausallm method)": [[14, "tensorrt_llm.models.LLaMAForCausalLM.forward"]], "forward() (tensorrt_llm.models.llamamodel method)": [[14, "tensorrt_llm.models.LLaMAModel.forward"]], "forward() (tensorrt_llm.models.optlmheadmodel method)": [[14, "tensorrt_llm.models.OPTLMHeadModel.forward"]], "forward() (tensorrt_llm.models.optmodel method)": [[14, "tensorrt_llm.models.OPTModel.forward"]], "fp8_quantize() (in module tensorrt_llm.models)": [[14, "tensorrt_llm.models.fp8_quantize"]], "prepare_inputs() (tensorrt_llm.models.baichuanforcausallm method)": [[14, "tensorrt_llm.models.BaichuanForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.bloomforcausallm method)": [[14, "tensorrt_llm.models.BloomForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.chatglm2headmodel method)": [[14, "tensorrt_llm.models.ChatGLM2HeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.chatglm6bheadmodel method)": [[14, "tensorrt_llm.models.ChatGLM6BHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[14, "tensorrt_llm.models.DecoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[14, "tensorrt_llm.models.EncoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.falconforcausallm method)": [[14, "tensorrt_llm.models.FalconForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptjforcausallm method)": [[14, "tensorrt_llm.models.GPTJForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptlmheadmodel method)": [[14, "tensorrt_llm.models.GPTLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.gptneoxforcausallm method)": [[14, "tensorrt_llm.models.GPTNeoXForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.llamaforcausallm method)": [[14, "tensorrt_llm.models.LLaMAForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.optlmheadmodel method)": [[14, "tensorrt_llm.models.OPTLMHeadModel.prepare_inputs"]], "smooth_quantize() (in module tensorrt_llm.models)": [[14, "tensorrt_llm.models.smooth_quantize"]], "tensorrt_llm.models": [[14, "module-tensorrt_llm.models"]], "weight_only_groupwise_quantize() (in module tensorrt_llm.models)": [[14, "tensorrt_llm.models.weight_only_groupwise_quantize"]], "weight_only_quantize() (in module tensorrt_llm.models)": [[14, "tensorrt_llm.models.weight_only_quantize"]], "tensorrt_llm.plugin": [[15, "module-tensorrt_llm.plugin"]], "quantmode (class in tensorrt_llm.quantization)": [[16, "tensorrt_llm.quantization.QuantMode"]], "tensorrt_llm.quantization": [[16, "module-tensorrt_llm.quantization"]], "chatglm6bheadmodelgenerationsession (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession"]], "generationsequence (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.GenerationSequence"]], "generationsession (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.GenerationSession"]], "kvcachemanager (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.KVCacheManager"]], "modelconfig (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.ModelConfig"]], "samplingconfig (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.SamplingConfig"]], "session (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.Session"]], "tensorinfo (class in tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.TensorInfo"]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[17, "tensorrt_llm.runtime.KVCacheManager.add_sequence"]], "batch_size (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.batch_size"]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.batch_size"]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate"]], "buffer_allocated (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.buffer_allocated"]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.buffer_allocated"]], "context (tensorrt_llm.runtime.session property)": [[17, "tensorrt_llm.runtime.Session.context"]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.cross_attention"]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.cross_attention"]], "cuda_graph_mode (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.cuda_graph_mode"]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode"]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard"]], "debug_mode (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.debug_mode"]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.debug_mode"]], "debug_tensors_to_save (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.debug_tensors_to_save"]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save"]], "decode() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.decode"]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.decode_batch"]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.decode_regular"]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.decode_stream"]], "device (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.device"]], "device (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.device"]], "dtype (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.dtype"]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.dtype"]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[17, "tensorrt_llm.runtime.TensorInfo.dtype"]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.end_id"]], "engine (tensorrt_llm.runtime.session property)": [[17, "tensorrt_llm.runtime.Session.engine"]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.finalize_decoder"]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.first_layer"]], "from_engine() (tensorrt_llm.runtime.session static method)": [[17, "tensorrt_llm.runtime.Session.from_engine"]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[17, "tensorrt_llm.runtime.Session.from_serialized_engine"]], "gather_all_token_logits (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.gather_all_token_logits"]], "gather_all_token_logits (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.gather_all_token_logits"]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[17, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx"]], "get_pointer_arrays() (tensorrt_llm.runtime.kvcachemanager method)": [[17, "tensorrt_llm.runtime.KVCacheManager.get_pointer_arrays"]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[17, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx"]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin"]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.handle_per_step"]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.has_position_embedding"]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.has_position_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding"]], "head_size (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.head_size"]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.hidden_size"]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[17, "tensorrt_llm.runtime.Session.infer_shapes"]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.last_layer"]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.length_penalty"]], "mapping (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.mapping"]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.mapping"]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.min_length"]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.model_name"]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[17, "tensorrt_llm.runtime.TensorInfo.name"]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.num_beams"]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.num_heads"]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.num_heads"]], "num_heads_kv (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.num_heads_kv"]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.num_kv_heads"]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.num_layers"]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.num_layers"]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs"]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.output_log_probs"]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.pad_id"]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache"]], "paged_kv_cache (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.paged_kv_cache"]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids"]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens"]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.presence_penalty"]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.quant_mode"]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.quant_mode"]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.random_seed"]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.remove_input_padding"]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty"]], "run() (tensorrt_llm.runtime.session method)": [[17, "tensorrt_llm.runtime.Session.run"]], "runtime (tensorrt_llm.runtime.chatglm6bheadmodelgenerationsession attribute)": [[17, "tensorrt_llm.runtime.ChatGLM6BHeadModelGenerationSession.runtime"]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[17, "tensorrt_llm.runtime.GenerationSession.runtime"]], "runtime (tensorrt_llm.runtime.session property)": [[17, "tensorrt_llm.runtime.Session.runtime"]], "setup() (tensorrt_llm.runtime.generationsession method)": [[17, "tensorrt_llm.runtime.GenerationSession.setup"]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[17, "tensorrt_llm.runtime.TensorInfo.shape"]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[17, "tensorrt_llm.runtime.KVCacheManager.step"]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.temperature"]], "tensorrt_llm.runtime": [[17, "module-tensorrt_llm.runtime"]], "to_word_list_format() (in module tensorrt_llm.runtime)": [[17, "tensorrt_llm.runtime.to_word_list_format"]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.tokens_per_block"]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.tokens_per_block"]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.top_k"]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.top_p"]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[17, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps"]], "use_custom_all_reduce (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.use_custom_all_reduce"]], "use_custom_all_reduce (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.use_custom_all_reduce"]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin"]], "use_prompt_tuning (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.use_prompt_tuning"]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[17, "tensorrt_llm.runtime.GenerationSession.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[17, "tensorrt_llm.runtime.ModelConfig.vocab_size"]]}}) |