TensorRT-LLMs/searchindex.js


			
				
					
						
						
						
							
							
							Search.setIndex({"docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "advanced/batch-manager", "advanced/expert-parallelism", "advanced/gpt-attention", "advanced/gpt-runtime", "advanced/graph-rewriting", "advanced/inference-request", "advanced/lora", "architecture/add-model", "architecture/checkpoint", "architecture/core-concepts", "architecture/overview", "architecture/workflow", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "executor", "index", "installation/build-from-source-linux", "installation/build-from-source-windows", "installation/linux", "installation/windows", "overview", "performance/perf-analysis", "performance/perf-best-practices", "performance/perf-overview", "python-api/tensorrt_llm.functional", "python-api/tensorrt_llm.layers", "python-api/tensorrt_llm.models", "python-api/tensorrt_llm.plugin", "python-api/tensorrt_llm.quantization", "python-api/tensorrt_llm.runtime", "quick-start-guide", "reference/memory", "reference/precision", "reference/support-matrix", "reference/troubleshooting", "release-notes", "speculative_decoding"], "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "advanced/batch-manager.md", "advanced/expert-parallelism.md", "advanced/gpt-attention.md", "advanced/gpt-runtime.md", "advanced/graph-rewriting.md", "advanced/inference-request.md", "advanced/lora.md", "architecture/add-model.md", "architecture/checkpoint.md", "architecture/core-concepts.md", "architecture/overview.md", "architecture/workflow.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "executor.md", "index.rst", "installation/build-from-source-linux.md", "installation/build-from-source-windows.md", "installation/linux.md", "installation/windows.md", "overview.md", "performance/perf-analysis.md", "performance/perf-best-practices.md", "performance/perf-overview.md", "python-api/tensorrt_llm.functional.rst", "python-api/tensorrt_llm.layers.rst", "python-api/tensorrt_llm.models.rst", "python-api/tensorrt_llm.plugin.rst", "python-api/tensorrt_llm.quantization.rst", "python-api/tensorrt_llm.runtime.rst", "quick-start-guide.md", "reference/memory.md", "reference/precision.md", "reference/support-matrix.md", "reference/troubleshooting.md", "release-notes.md", "speculative_decoding.md"], "titles": ["Executor", "Runtime", "The Batch Manager in TensorRT-LLM", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "Inference Request", "Run gpt-2b + LoRA using GptManager / cpp runtime", "Adding a Model", "TensorRT-LLM Checkpoint", "Model Definition", "TensorRT-LLM Architecture", "TensorRT-LLM Build Workflow", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "Executor API", "Welcome to TensorRT-LLM\u2019s Documentation!", "Building from Source Code on Linux", "Building from Source Code on Windows", "Installing on Linux", "Installing on Windows", "Overview", "Performance Analysis", "Best Practices for Tuning the Performance of TensorRT-LLM", "Overview", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Quick Start Guide", "Memory Usage of TensorRT-LLM", "Numerical Precision", "Support Matrix", "Troubleshooting", "Release Notes", "Speculative Sampling"], "terms": {"namespac": [0, 1, 2], "tensorrt_llm": [0, 1, 2, 4, 5, 6, 8, 9, 11, 13, 19, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 34, 35, 39, 40], "variabl": [0, 1, 2, 5, 16, 22, 24, 29, 39], "sizetyp": [0, 1], "const": [0, 1, 2, 19], "kdefaultiterstatsmaxiter": 0, "1000": 0, "kdefaultrequeststatsmaxiter": 0, "0": [0, 1, 2, 4, 5, 6, 8, 10, 11, 13, 15, 16, 18, 20, 22, 23, 26, 27, 28, 29, 30, 31, 34, 35, 39, 41], "class": [0, 1, 2, 4, 5, 6, 7, 11, 13, 18, 21, 29, 30, 31, 32, 33, 34, 39, 40], "samplingconfig": [0, 5, 19, 34, 40], "includ": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 14, 15, 17, 19, 21, 22, 24, 25, 27, 28, 35, 37, 40, 41], "sampl": [0, 1, 4, 7, 11, 12, 19, 26, 34, 40], "configur": [0, 1, 3, 4, 7, 12, 16, 19, 22, 27, 28, 34, 35, 36, 39, 40, 41], "public": [0, 1, 18, 22], "function": [0, 1, 2, 4, 9, 11, 12, 13, 19, 26, 27, 28, 32, 34, 36, 37, 38, 39, 40], "explicit": [0, 1, 5, 26, 29], "beamwidth": [0, 1, 2, 5, 40], "1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 34, 35, 36, 38, 39, 41], "std": [0, 1, 2, 19], "option": [0, 1, 5, 6, 7, 9, 13, 15, 20, 22, 23, 24, 28, 29, 34, 36, 39, 40, 41], "topk": [0, 1, 3, 5, 7, 29, 41], "nullopt": [0, 1], "floattyp": [0, 1], "topp": [0, 1, 5, 7, 40], "toppmin": [0, 1, 5], "toppresetid": [0, 1, 5], "toppdecai": [0, 1, 5], "randomseedtyp": 0, "randomse": [0, 1, 5, 7], "temperatur": [0, 1, 5, 7, 34, 40], "minlength": [0, 1, 5, 7], "beamsearchdiversityr": [0, 1, 5], "repetitionpenalti": [0, 1, 5, 7], "presencepenalti": [0, 1, 5, 7], "frequencypenalti": [0, 1, 5, 7], "lengthpenalti": [0, 1, 5, 7], "earlystop": [0, 1, 5, 7], "constructor": [0, 1, 2], "see": [0, 1, 2, 4, 11, 14, 16, 17, 18, 26, 27, 28, 29, 30, 31, 36, 37, 39, 40], "descript": [0, 1, 7, 8, 20, 29], "paramet": [0, 1, 3, 4, 8, 10, 11, 13, 19, 27, 29, 30, 31, 34, 35, 36, 40, 41], "below": [0, 1, 2, 4, 6, 7, 8, 16, 17, 18, 28, 35], "bool": [0, 1, 2, 6, 7, 10, 29, 30, 31, 32, 34], "oper": [0, 1, 2, 4, 5, 6, 10, 11, 19, 27, 28, 29, 35, 36, 38], "other": [0, 1, 3, 4, 5, 11, 13, 14, 19, 21, 25, 27, 28, 29, 36, 41], "getbeamwidth": 0, "gettopk": 0, "gettopp": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettoppdecai": 0, "getrandomse": 0, "gettemperatur": 0, "getminlength": 0, "getbeamsearchdiversityr": 0, "getrepetitionpenalti": 0, "getpresencepenalti": 0, "getfrequencypenalti": 0, "getlengthpenalti": 0, "getearlystop": 0, "privat": [0, 1, 5], "member": [0, 1, 2, 5, 6, 11, 29], "mbeamwidth": [0, 1], "The": [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 34, 35, 36, 38, 39, 40, 41], "beam": [0, 1, 2, 5, 7, 12, 17, 20, 29, 34, 36, 39, 40, 41], "width": [0, 1, 2, 4, 5, 7, 34, 36, 40], "default": [0, 1, 2, 3, 4, 5, 7, 10, 13, 21, 22, 24, 27, 28, 29, 31, 34, 35, 36, 37, 39, 40], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41], "which": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14, 18, 19, 21, 22, 24, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 41], "disabl": [0, 1, 2, 4, 5, 27, 28, 29, 32, 34, 36, 40], "search": [0, 1, 5, 12, 17, 20, 24, 29, 40, 41], "mtopk": 0, "control": [0, 2, 4, 5, 6, 19, 27, 29, 34, 37], "number": [0, 1, 2, 4, 5, 7, 11, 17, 19, 28, 29, 30, 35, 36, 37, 41], "logit": [0, 1, 5, 7, 29, 34, 39, 40], "from": [0, 1, 2, 4, 5, 6, 8, 10, 11, 12, 13, 16, 18, 19, 20, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 40, 41], "all": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 16, 19, 21, 22, 23, 24, 25, 27, 28, 29, 30, 32, 34, 35, 36, 37, 38, 40, 41], "mtopp": 0, "top": [0, 4, 5, 11, 12, 29, 40, 41], "p": [0, 5, 7, 12, 29, 40, 41], "probabl": [0, 1, 5, 40, 41], "f": [0, 4, 5, 28, 29, 39], "mtoppmin": 0, "decai": [0, 5], "algorithm": [0, 4, 5, 10, 11, 13, 18, 27, 29, 40], "lower": [0, 1, 5, 6, 8, 17, 18, 27, 29, 36], "bound": [0, 5, 11, 16, 29, 34, 36], "e": [0, 1, 4, 8, 22, 26, 28, 29, 34, 37, 39], "6": [0, 1, 5, 8, 16, 18, 28, 29, 39, 41], "mtoppresetid": 0, "indic": [0, 1, 2, 4, 5, 10, 19, 27, 28, 29, 30, 36, 41], "where": [0, 1, 2, 4, 5, 7, 10, 11, 14, 18, 27, 28, 29, 34, 35, 36, 37, 41], "reset": [0, 1, 5, 34], "mtoppdecai": 0, "valu": [0, 1, 2, 4, 5, 7, 8, 10, 11, 14, 15, 27, 29, 31, 33, 34, 36, 37, 39], "mrandomse": 0, "random": [0, 5], "seed": [0, 5, 33], "us": [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 39, 40], "gener": [0, 1, 2, 5, 7, 10, 11, 13, 14, 15, 17, 19, 20, 22, 25, 26, 27, 28, 29, 34, 35, 36, 38, 39, 40, 41], "mtemperatur": 0, "modul": [0, 1, 2, 4, 5, 10, 11, 20, 21, 30, 31, 34, 39], "when": [0, 1, 2, 3, 4, 5, 7, 11, 13, 17, 18, 19, 20, 21, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41], "new": [0, 1, 2, 4, 5, 6, 7, 8, 13, 15, 16, 19, 22, 24, 25, 29, 34, 36, 40, 41], "token": [0, 1, 2, 3, 4, 5, 7, 11, 14, 17, 18, 19, 26, 29, 30, 34, 35, 36, 37, 40, 41], "It": [0, 1, 2, 4, 5, 6, 8, 9, 11, 12, 14, 17, 18, 19, 21, 25, 27, 28, 29, 37, 39, 41], "can": [0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 31, 34, 35, 36, 37, 39, 41], "have": [0, 1, 2, 3, 4, 5, 7, 8, 10, 13, 14, 16, 18, 19, 25, 26, 27, 28, 29, 36, 39, 41], "0f": [0, 5], "mminlength": 0, "effect": [0, 2, 5, 22, 27, 41], "mbeamsearchdiversityr": 0, "divers": [0, 5, 26], "mrepetitionpenalti": 0, "penal": [0, 5], "base": [0, 1, 5, 12, 13, 14, 15, 18, 22, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41], "how": [0, 2, 5, 9, 11, 13, 19, 20, 26, 28, 29, 35, 36, 37, 39, 41], "often": [0, 5, 14, 18, 19, 29, 41], "thei": [0, 1, 4, 5, 7, 8, 11, 13, 19, 21, 22, 24, 27, 28, 29, 31, 37, 39], "appear": [0, 2, 4, 5, 29, 39], "sequenc": [0, 1, 2, 4, 5, 6, 11, 14, 15, 16, 17, 25, 28, 29, 30, 34, 36, 40, 41], "ani": [0, 2, 5, 6, 13, 19, 22, 24, 25, 31, 34, 35, 39, 41], "encourag": [0, 5, 13], "repetit": [0, 5, 29, 41], "discourag": [0, 5], "mpresencepenalti": 0, "alreadi": [0, 4, 5, 6, 12, 22, 27, 40], "present": [0, 1, 5, 37, 40], "irrespect": [0, 5], "mfrequencypenalti": 0, "depend": [0, 2, 4, 5, 6, 10, 16, 22, 23, 24, 27, 28, 29, 35, 36, 39, 40, 41], "mlengthpenalti": 0, "longer": [0, 5], "mearlystop": 0, "whether": [0, 1, 2, 4, 5, 29, 30, 34], "process": [0, 1, 2, 4, 5, 10, 11, 13, 19, 25, 26, 27, 28, 29, 41], "finish": [0, 1, 5, 13, 25, 34], "onc": [0, 2, 4, 5, 6, 11, 12, 19, 21, 26, 28, 29, 36], "sentenc": [0, 5, 34], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41], "end": [0, 1, 2, 4, 5, 7, 11, 25, 26, 27, 28, 29, 40], "end_token": [0, 5], "friend": [0, 1], "serial": [0, 29, 34], "outputconfig": [0, 19], "output": [0, 1, 2, 4, 5, 6, 7, 11, 14, 15, 16, 17, 18, 19, 26, 27, 28, 29, 30, 34, 39, 41], "result": [0, 1, 2, 3, 4, 5, 11, 14, 15, 16, 18, 20, 21, 26, 27, 29, 30, 40, 41], "returnlogprob": 0, "fals": [0, 1, 2, 4, 5, 6, 7, 10, 19, 28, 29, 30, 31, 32, 34, 39, 40], "returncontextlogit": 0, "returngenerationlogit": 0, "excludeinputfromoutput": 0, "should": [0, 1, 2, 6, 8, 13, 19, 21, 22, 24, 27, 28, 29, 30, 34, 36, 39, 40, 41], "contain": [0, 1, 2, 4, 5, 6, 8, 10, 11, 12, 13, 19, 23, 25, 29, 31, 34, 35, 37, 38, 40], "log": [0, 1, 2, 4, 5, 7, 29, 35, 36, 40], "context": [0, 1, 2, 3, 7, 18, 20, 26, 29, 34, 36, 39, 40], "input": [0, 1, 2, 5, 6, 7, 11, 14, 15, 16, 17, 18, 19, 20, 28, 29, 30, 31, 34, 36, 39, 40, 41], "speculativedecodingconfig": [0, 1], "specul": [0, 1, 19, 40], "decod": [0, 1, 4, 5, 13, 19, 26, 34, 36, 38, 40, 41], "allow": [0, 1, 2, 4, 5, 10, 14, 17, 19, 25, 26, 27, 28, 29, 41], "draft": [0, 1, 7], "specifi": [0, 1, 2, 5, 6, 7, 8, 13, 19, 21, 22, 26, 27, 28, 29, 31, 35, 36, 41], "accept": [0, 1, 2, 21, 29, 40, 41], "threshold": [0, 29], "vectoken": 0, "acceptancethreshold": 0, "gettoken": 0, "getlogit": 0, "getacceptancethreshold": 0, "mtoken": 0, "mlogit": 0, "expect": [0, 2, 4, 5, 7, 11, 13, 16, 27, 28, 29, 38], "shape": [0, 1, 2, 4, 6, 7, 8, 10, 11, 29, 31, 34, 36, 37, 39, 40], "num_draft_token": [0, 7], "vocab_s": [0, 7, 10, 28, 30, 31, 34], "macceptancethreshold": 0, "must": [0, 1, 2, 4, 5, 7, 8, 11, 12, 19, 22, 27, 29, 30, 34, 37, 39, 41], "prompttuningconfig": 0, "prompt": [0, 7, 19, 22, 24, 30, 34, 40, 41], "tune": [0, 7, 15, 18, 19, 20, 30, 34, 35, 36, 40, 41], "embeddingt": [0, 1], "getembeddingt": 0, "membeddingt": 0, "embed": [0, 7, 29, 40], "tabl": [0, 7, 15, 18, 27, 28, 29, 30, 34, 38], "task": [0, 1, 7, 8, 10, 28, 30, 34, 37, 40, 41], "hidden_s": [0, 6, 10, 28, 29, 30, 31, 34, 39], "data": [0, 1, 4, 7, 11, 14, 15, 16, 17, 18, 27, 28, 29, 31, 38, 39], "match": [0, 1, 6, 20, 28, 29, 34, 35, 41], "model": [0, 1, 2, 3, 4, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 25, 29, 33, 36, 37, 38], "weight": [0, 1, 2, 3, 7, 8, 13, 14, 15, 18, 20, 25, 29, 30, 31, 34, 40], "loraconfig": [0, 8], "lora": [0, 1, 2, 7, 19, 20, 29, 31, 34, 40], "idtyp": [0, 19], "taskid": [0, 1], "config": [0, 1, 4, 7, 13, 14, 28, 29, 31, 34, 35, 36], "gettaskid": 0, "getweight": 0, "getconfig": 0, "mtaskid": 0, "id": [0, 1, 2, 7, 19, 26, 29, 30, 34], "mweight": 0, "trt": [0, 2, 6, 8, 11, 15, 22, 29, 31, 33, 34, 36, 39, 40], "llm": [0, 1, 4, 5, 6, 8, 9, 11, 14, 17, 19, 23, 24, 26, 29, 31, 33, 35, 37, 38, 39], "document": [0, 4, 5, 7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 21, 26, 27, 28, 29, 36, 37, 39, 40], "mconfig": [0, 1], "detail": [0, 2, 4, 11, 19, 20, 27, 28, 29, 31, 36, 39, 41], "request": [0, 1, 4, 5, 8, 11, 15, 17, 20, 25, 26, 27, 29, 36, 41], "A": [0, 1, 2, 4, 5, 8, 10, 11, 13, 18, 19, 29, 34, 41], "hold": [0, 1, 2, 3, 6, 8, 19, 30, 36, 41], "inform": [0, 1, 2, 4, 5, 7, 8, 10, 11, 14, 17, 19, 22, 25, 26, 35, 38, 41], "about": [0, 1, 13, 14, 15, 17, 18, 19, 20, 28, 35, 36, 40], "inputtokenid": 0, "maxnewtoken": [0, 1, 41], "stream": [0, 1, 2, 5, 7, 11, 19, 27, 28, 34, 36, 39], "endid": [0, 1], "padid": [0, 1], "list": [0, 1, 2, 6, 7, 10, 11, 12, 19, 21, 25, 28, 29, 30, 31, 34, 38, 40], "badword": 0, "stopword": 0, "embeddingbia": [0, 1], "ptuningconfig": 0, "string": [0, 1, 2, 10, 19, 29, 34], "logitspostprocessornam": 0, "maximum": [0, 1, 2, 4, 5, 16, 19, 28, 29, 36, 39], "respons": [0, 29], "pad": [0, 1, 2, 5, 6, 7, 8, 20, 25, 29, 30, 36], "bad": [0, 7, 19, 40], "word": [0, 1, 4, 7, 19, 29, 34], "each": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 19, 26, 27, 28, 29, 30, 34, 36, 37, 41], "compos": [0, 1, 5], "multipl": [0, 1, 2, 3, 4, 6, 7, 11, 19, 25, 27, 28, 29, 30, 35, 39, 41], "stop": [0, 1, 2, 6, 7, 19, 26, 34, 40, 41], "bia": [0, 1, 2, 7, 10, 11, 19, 28, 29, 30, 39], "kfp32": 0, "postprocessor": 0, "name": [0, 1, 2, 5, 6, 7, 8, 10, 19, 26, 29, 34], "correspond": [0, 1, 2, 3, 4, 6, 8, 13, 23, 26, 29, 30, 34, 37, 40, 41], "one": [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 13, 14, 19, 22, 24, 27, 28, 29, 34, 35, 36, 39, 40, 41], "provid": [0, 1, 2, 4, 5, 6, 9, 10, 12, 13, 14, 15, 18, 19, 21, 24, 25, 26, 27, 28, 29, 34, 36, 38, 39, 41], "executorconfig": [0, 19], "noexcept": [0, 1], "getinputtokenid": 0, "getmaxnewtoken": 0, "getstream": [0, 1], "getsamplingconfig": [0, 1], "getoutputconfig": 0, "getendid": 0, "getpadid": 0, "getbadword": 0, "getstopword": 0, "getembeddingbia": 0, "getspeculativedecodingconfig": 0, "getprompttuningconfig": 0, "getloraconfig": 0, "getlogitspostprocessornam": 0, "void": [0, 1, 2, 11], "setstream": 0, "setsamplingconfig": 0, "setoutputconfig": 0, "setendid": 0, "setpadid": 0, "setbadword": 0, "setstopword": 0, "setembeddingbia": 0, "setspeculativedecodingconfig": 0, "specdecodingconfig": [0, 1], "setprompttuningconfig": 0, "setloraconfig": 0, "setlogitspostprocessornam": 0, "unique_ptr": [0, 1], "impl": 0, "mimpl": 0, "struct": [0, 1], "isfin": [0, 19], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 39, 40, 41], "final": [0, 1, 2, 8, 29], "beamtoken": [0, 2, 19], "outputtokenid": [0, 19], "veclogprob": 0, "cumlogprob": [0, 1, 2], "cumul": [0, 1, 29], "size": [0, 1, 2, 3, 4, 5, 7, 8, 15, 16, 18, 28, 29, 30, 31, 34, 35, 39, 40, 41], "beamsiz": 0, "vector": [0, 1, 5, 19, 29], "logprob": [0, 1, 2], "outputlen": 0, "contextlogit": [0, 1], "promptlen": 0, "vocabsizepad": [0, 1, 2], "generationlogit": [0, 1], "either": [0, 1, 2, 12, 19, 29, 36, 39], "an": [0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 16, 18, 19, 21, 24, 25, 26, 27, 28, 29, 33, 34, 36, 37, 39, 40, 41], "error": [0, 2, 7, 8, 13, 19, 20, 36, 40], "requestid": [0, 7], "errormsg": 0, "getrequestid": 0, "get": [0, 1, 4, 6, 17, 21, 22, 23, 25, 26, 29, 34, 35], "wa": [0, 1, 4, 5, 10, 28, 30, 35, 37, 39], "haserror": [0, 19], "ha": [0, 1, 2, 4, 7, 8, 10, 11, 13, 14, 18, 19, 21, 27, 29, 36, 37, 40], "geterrormsg": 0, "msg": [0, 1], "Will": 0, "throw": [0, 1], "except": [0, 4, 5, 13, 29], "getresult": [0, 19], "hasresult": 0, "true": [0, 1, 2, 6, 7, 10, 19, 22, 27, 28, 29, 30, 31, 32, 34, 35, 39, 41], "schedulerconfig": 0, "schedul": [0, 2, 8, 19, 26, 36], "schedulerpolici": [0, 2, 27], "polici": [0, 1, 2, 36], "kguaranteed_no_evict": 0, "getpolici": 0, "mpolici": 0, "kvcacheconfig": [0, 1, 2, 36, 41], "kv": [0, 2, 8, 11, 13, 14, 18, 20, 25, 26, 29, 34, 35, 40, 41], "cach": [0, 1, 2, 5, 7, 8, 11, 13, 18, 20, 25, 26, 29, 34, 35, 37, 40, 41], "enableblockreus": [0, 2, 41], "maxtoken": [0, 2, 36], "maxattentionwindow": [0, 1, 2], "sinktokenlength": [0, 1], "freegpumemoryfract": [0, 2, 36, 40], "size_t": [0, 1], "hostcaches": [0, 2], "onboardblock": 0, "getenableblockreus": 0, "getmaxtoken": 0, "getmaxattentionwindow": 0, "getsinktokenlength": 0, "getfreegpumemoryfract": 0, "gethostcaches": 0, "getonboardblock": 0, "menableblockreus": 0, "block": [0, 1, 2, 4, 5, 11, 26, 29, 34, 36], "reus": [0, 2, 36, 40, 41], "differ": [0, 1, 3, 4, 5, 10, 11, 13, 18, 22, 25, 27, 28, 29, 31, 36, 37, 40], "mmaxtoken": 0, "store": [0, 1, 2, 4, 8, 11, 15, 27, 29, 36, 37], "If": [0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 13, 18, 19, 21, 22, 23, 25, 27, 28, 29, 31, 34, 36, 38, 39, 40, 41], "both": [0, 2, 4, 5, 6, 8, 11, 15, 18, 24, 27, 28, 29, 30, 36, 37, 40, 41], "mfreegpumemoryfract": 0, "memori": [0, 1, 2, 3, 4, 5, 8, 11, 13, 14, 15, 17, 18, 20, 22, 26, 28, 34, 35, 39], "minimum": [0, 2, 4, 29, 34, 36], "alloc": [0, 1, 2, 4, 27, 29, 34, 35, 36, 39], "mmaxattentionwindow": [0, 1], "attent": [0, 2, 5, 8, 9, 11, 14, 20, 28, 29, 34, 35, 36, 39, 40, 41], "window": [0, 2, 20, 29, 34, 38, 40], "onli": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 18, 19, 20, 21, 24, 27, 28, 29, 30, 34, 36, 40, 41], "last": [0, 1, 2, 4, 8, 19, 27, 29, 41], "msinktokenlength": [0, 1], "sink": [0, 4, 34], "alwai": [0, 4, 5, 10, 11, 13], "keep": [0, 1, 4, 13, 26, 27, 29, 40], "fraction": [0, 2, 29, 34], "gpu": [0, 1, 3, 4, 5, 8, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 23, 29, 31, 34, 35, 38, 39, 40, 41], "90": [0, 21, 26, 27], "mhostcaches": 0, "secondari": 0, "pool": [0, 1, 2, 4, 20, 34], "byte": [0, 1, 2, 34], "increas": [0, 1, 2, 4, 5, 11, 15, 16, 26, 27, 29, 40, 41], "potenti": [0, 1, 2, 7, 26, 41], "monboardblock": 0, "offload": 0, "onboard": 0, "back": [0, 2, 41], "primari": [0, 18], "befor": [0, 1, 2, 4, 6, 10, 11, 19, 21, 25, 28, 29, 34, 35, 36, 39], "being": [0, 2, 4, 5, 11, 13, 28, 40], "parallelconfig": 0, "parallel": [0, 2, 4, 5, 10, 11, 14, 16, 17, 20, 28, 29, 30, 31, 36, 40], "execut": [0, 1, 5, 8, 11, 12, 13, 19, 20, 23, 24, 25, 27, 29, 34, 35, 36, 41], "current": [0, 1, 2, 4, 5, 8, 18, 22, 24, 27, 28, 29, 34, 36, 41], "support": [0, 1, 2, 3, 4, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 27, 29, 39, 40, 41], "commtyp": 0, "communicationtyp": 0, "kmpi": 0, "communicationmod": 0, "commmod": 0, "kleader": 0, "deviceid": [0, 1], "participantid": 0, "commun": [0, 2, 5, 11, 22, 25, 29, 40], "mode": [0, 1, 2, 4, 6, 11, 22, 29, 30, 34, 36, 37], "involv": [0, 11, 18, 30, 41], "particip": [0, 29], "mpi": [0, 1, 2, 5, 12, 13, 24, 26, 29, 39, 40], "rank": [0, 1, 2, 3, 5, 8, 13, 20, 29, 31, 34, 36], "first": [0, 1, 2, 4, 5, 6, 7, 8, 12, 16, 18, 19, 26, 27, 29, 40, 41], "consid": [0, 1, 2, 18, 27, 28, 29, 41], "leader": 0, "getcommunicationtyp": 0, "getcommunicationmod": 0, "getdeviceid": 0, "getparticipantid": 0, "setcommunicationtyp": 0, "setcommunicationmod": 0, "setdeviceid": 0, "setparticipantid": 0, "mcommtyp": 0, "protocol": 0, "mcommmod": 0, "mdeviceid": [0, 1], "devic": [0, 1, 2, 22, 26, 28, 29, 33, 34, 39], "mparticipantid": 0, "exampl": [0, 1, 2, 4, 5, 6, 9, 11, 12, 13, 14, 16, 18, 20, 21, 22, 23, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41], "peftcacheconfig": 0, "peftcachemanag": 0, "numhostmodulelay": [0, 2], "numdevicemodulelay": [0, 2], "optimaladapters": [0, 1, 2], "8": [0, 1, 2, 4, 8, 10, 13, 14, 16, 17, 18, 20, 22, 26, 28, 29, 30, 36, 37, 39], "maxadapters": [0, 2], "64": [0, 2, 4, 5, 10, 15, 16, 22, 28, 34, 40], "numputwork": [0, 2], "numensurework": [0, 2], "numcopystream": [0, 1, 2], "maxpagesperblockhost": [0, 2], "24": [0, 2, 39, 40], "maxpagesperblockdevic": [0, 2], "float": [0, 1, 5, 7, 10, 11, 15, 27, 29, 30, 31, 34, 37], "devicecacheperc": [0, 2], "getnumhostmodulelay": 0, "getnumdevicemodulelay": 0, "getoptimaladapters": 0, "getmaxadapters": 0, "getnumputwork": 0, "getnumensurework": 0, "getnumcopystream": [0, 1], "getmaxpagesperblockhost": 0, "getmaxpagesperblockdevic": 0, "getdevicecacheperc": 0, "mnumhostmodulelay": 0, "mnumdevicemodulelay": 0, "moptimaladapters": 0, "mmaxadapters": 0, "mnumputwork": 0, "mnumensurework": 0, "mnumcopystream": [0, 1], "mmaxpagesperblockhost": 0, "mmaxpagesperblockdevic": 0, "mdevicecacheperc": 0, "maxbeamwidth": [0, 1, 2], "enablechunkedcontext": [0, 2], "normalizelogprob": [0, 1], "iterstatsmaxiter": 0, "requeststatsmaxiter": 0, "batchingtyp": 0, "kinflight": 0, "logitspostprocessormap": 0, "medusachoic": 0, "getmaxbeamwidth": [0, 1], "getschedulerconfig": 0, "getkvcacheconfig": 0, "getenablechunkedcontext": 0, "getnormalizelogprob": [0, 1], "getiterstatsmaxiter": 0, "getrequeststatsmaxiter": 0, "getbatchingtyp": 0, "getparallelconfig": 0, "getpeftcacheconfig": 0, "getlogitspostprocessormap": 0, "getmedusachoic": 0, "setmaxbeamwidth": [0, 1], "setschedulerconfig": 0, "setkvcacheconfig": 0, "setenablechunkedcontext": 0, "setnormalizelogprob": 0, "setiterstatsmaxiter": 0, "setrequeststatsmaxiter": 0, "setbatchingtyp": 0, "setparallelconfig": 0, "setpeftcacheconfig": 0, "setlogitspostprocessormap": 0, "setmedusachoic": 0, "mmaxbeamwidth": [0, 1], "sent": [0, 2, 35, 41], "mschedulerconfig": 0, "mkvcacheconfig": 0, "menablechunkedcontext": 0, "mnormalizelogprob": [0, 1], "normal": [0, 18, 29, 36], "miterstatsmaxiter": 0, "iter": [0, 1, 2, 4, 7, 19, 20, 25, 27, 34, 40, 41], "statist": [0, 19, 41], "mrequeststatsmaxiter": 0, "per": [0, 1, 2, 4, 5, 13, 14, 16, 17, 19, 20, 26, 29, 36, 37, 40, 41], "mbatchingtyp": 0, "batch": [0, 1, 7, 8, 11, 12, 15, 16, 18, 19, 20, 26, 28, 29, 34, 35, 36, 39, 40, 41], "strategi": [0, 18, 29, 31, 36, 41], "mparallelconfig": 0, "mpeftcacheconfig": 0, "mlogitspostprocessormap": 0, "mmedusachoic": 0, "receiv": [0, 1, 2, 3, 19, 29, 41], "send": [0, 1, 11, 20, 29], "run": [0, 1, 2, 4, 5, 7, 10, 11, 14, 18, 19, 20, 21, 23, 24, 25, 26, 27, 29, 34, 36, 37, 39, 40], "infer": [0, 1, 5, 8, 11, 12, 13, 14, 15, 16, 20, 22, 23, 24, 27, 28, 29, 34, 37, 39, 40, 41], "filesystem": [0, 1], "path": [0, 1, 2, 4, 10, 19, 21, 22, 24, 28, 29, 34, 40, 41], "modelpath": 0, "modeltyp": [0, 2], "folder": [0, 5, 13, 19, 22, 24, 28, 37, 38, 40], "defin": [0, 1, 2, 4, 6, 10, 11, 12, 13, 16, 19, 25, 29, 30, 37, 39, 41], "comm": 0, "inter": [0, 39], "uint8_t": [0, 1], "enginebuff": [0, 1], "jsonconfigstr": 0, "shared_ptr": [0, 1, 2], "enqueuerequest": [0, 19], "enqueu": [0, 2, 11, 19, 34, 36], "return": [0, 1, 2, 6, 7, 8, 11, 13, 19, 29, 30, 31, 34, 36, 39, 40, 41], "uniqu": [0, 2, 4, 5, 7, 8, 10, 41], "identifi": [0, 2, 5, 8, 11, 29, 41], "awaitrespons": [0, 19], "chrono": 0, "millisecond": 0, "timeout": 0, "await": [0, 19], "readi": [0, 23, 35], "time": [0, 1, 2, 7, 8, 11, 16, 18, 20, 21, 22, 24, 25, 27, 28, 29, 34, 35, 39, 41], "wait": [0, 1, 2, 13, 25], "getnumresponsesreadi": 0, "cancelrequest": [0, 19], "cancel": [0, 19], "shutdown": 0, "signal": [0, 2], "server": [0, 11, 12, 15, 20, 23, 40], "call": [0, 1, 2, 3, 4, 5, 6, 11, 13, 19, 26, 29, 31, 33, 34, 35, 36], "termin": [0, 2, 22], "been": [0, 2, 3, 4, 7, 13, 15, 16, 19, 39, 40], "reach": [0, 1, 4, 10, 27], "dequ": [0, 1], "iterationstat": 0, "getlatestiterationstat": [0, 19], "comput": [0, 1, 2, 3, 4, 5, 11, 14, 15, 16, 18, 26, 27, 28, 29, 36, 41], "sinc": [0, 4, 6, 13, 21, 36, 41], "most": [0, 1, 5, 11, 13, 14, 15, 16, 18, 24, 26, 27, 29, 36], "stat": [0, 2], "requeststatsperiter": 0, "getlatestrequeststat": 0, "group": [0, 3, 5, 11, 14, 20, 28, 29, 30, 37, 40], "canenqueuerequest": 0, "jsonseri": 0, "util": [0, 1, 2, 4, 5, 11, 14, 25, 26, 27, 36, 41], "json": [0, 1, 2, 10, 19, 28], "static": [0, 1, 19, 22, 26, 29, 30, 34, 40], "tojsonstr": 0, "convert": [0, 1, 2, 8, 10, 11, 13, 25, 31, 35, 39, 40], "requeststatsperit": 0, "requeststat": 0, "common": [0, 4, 29, 36, 41], "arrayview": [0, 1], "int32_t": [0, 1, 2, 7, 29], "dimtyp": [0, 1], "typenam": [0, 1, 11], "remove_cv_t": 0, "value_typ": 0, "inlin": [0, 1], "size_typ": [0, 1], "initializer_list": [0, 1], "dim": [0, 1, 29, 30], "cudastreamptr": [0, 1], "runtim": [0, 2, 4, 7, 12, 20, 23, 25, 26, 28, 29, 30, 35, 39, 40, 41], "cudastream": 0, "copytocpu": 0, "nullptr": [0, 1], "copytopin": 0, "copytopooledpin": 0, "copytomanag": 0, "copytogpu": 0, "getdata": 0, "pointer": [0, 1, 2, 5, 29, 34], "underli": [0, 1, 2, 6, 36, 41], "arrai": [0, 1, 29, 34], "datatyp": [0, 1, 5, 11, 29, 34], "getdatatyp": [0, 1], "buffer": [0, 1, 2, 19, 20, 29, 36, 40], "memorytyp": [0, 1], "getmemorytyp": [0, 1], "getshap": [0, 1], "dimens": [0, 1, 4, 5, 8, 27, 29, 30, 31, 36], "getsiz": [0, 1], "element": [0, 1, 4, 5, 8, 29, 37], "getsizeinbyt": [0, 1], "setzero": [0, 1], "set": [0, 1, 2, 3, 4, 5, 6, 7, 10, 12, 13, 19, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 39, 40, 41], "entir": [0, 2, 11, 14, 19, 25, 29, 36], "zero": [0, 1, 2, 5, 29, 30, 37], "valid": [0, 1, 7, 22, 28, 29, 41], "cuda": [0, 1, 2, 4, 5, 11, 21, 22, 23, 24, 26, 27, 34, 36, 39, 40], "setfrom": 0, "copi": [0, 1, 2, 22, 24, 27, 28, 29, 35, 36], "anoth": [0, 1, 4, 6, 8, 13, 15, 29, 39], "rh": [0, 1], "cpu": [0, 1, 2, 8, 11, 26, 27, 29, 39], "given": [0, 1, 2, 5, 7, 8, 13, 16, 19, 26, 29, 30, 31, 34, 36, 37], "templat": [0, 1, 11], "t": [0, 1, 2, 4, 11, 13, 19, 22, 26, 29, 36, 41], "pin": [0, 1, 26], "pooledpin": 0, "manag": [0, 1, 4, 5, 8, 11, 12, 20, 25, 27, 34, 36, 40, 41], "uvm": [0, 1], "particular": [0, 19, 21, 35], "wrap": [0, 1, 2, 11, 29, 34, 40], "without": [0, 1, 2, 4, 11, 18, 19, 21, 25, 29, 36, 40], "take": [0, 2, 4, 5, 6, 10, 13, 22, 25, 27, 28, 29, 30], "ownership": 0, "itensor": [0, 29], "copyto": 0, "mtensor": 0, "getruntimetyp": 0, "toitensor": 0, "ofitensor": 0, "typetrait": 0, "int8_t": [0, 1], "attribut": [0, 1, 6, 13, 34], "constexpr": [0, 1], "auto": [0, 1, 2, 4, 5, 11, 29, 40], "kint8": [0, 1], "kint32": [0, 1], "int64_t": [0, 1], "kint64": [0, 1], "kuint8": [0, 1], "typedef": [0, 1], "tensorptr": [0, 1, 2], "tokenidtyp": [0, 1], "uint64_t": [0, 1, 2, 7], "iterationtyp": 0, "streamptr": [0, 1, 19], "logitspostprocessor": [0, 2], "unordered_map": [0, 1, 19], "enum": [0, 1], "enumer": [0, 1, 29, 33], "kbool": [0, 1], "kbf16": 0, "kfp8": 0, "kfp16": 0, "kunknown": 0, "kcpu": [0, 1], "kcpu_pin": 0, "kgpu": [0, 1], "kuvm": [0, 1], "kdecoder_onli": 0, "kstatic": 0, "refer": [0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 13, 19, 21, 22, 24, 25, 28, 29, 35, 38, 40, 41], "tradit": [0, 2], "scheme": [0, 2, 28], "lockstep": [0, 2], "until": [0, 1, 2, 27, 41], "full": [0, 2, 3, 4, 5, 7, 8, 15, 16, 25, 26, 35, 36, 39, 41], "them": [0, 1, 2, 3, 6, 25, 26, 27, 28, 29, 34, 36, 41], "complet": [0, 1, 2, 5, 7, 22, 25, 35, 41], "up": [0, 2, 4, 5, 8, 15, 16, 22, 24, 28, 35, 40, 41], "length": [0, 1, 2, 4, 5, 14, 15, 16, 17, 18, 27, 28, 29, 34, 36, 39], "inflight": [0, 4, 7, 8, 29, 41], "newli": [0, 1, 2], "arriv": [0, 2, 3], "dynam": [0, 2, 29, 31, 34], "incorpor": [0, 2, 25], "under": [0, 2, 18, 21, 24, 35, 39], "soon": [0, 2, 14, 15, 16, 17, 18, 28], "condit": [0, 2, 6, 27, 29, 41], "met": [0, 2, 27, 41], "select": [0, 2, 3, 12, 18, 22, 24, 29, 36], "subset": [0, 2, 5, 11, 13, 29], "avail": [0, 1, 2, 6, 11, 14, 16, 19, 21, 25, 27, 28, 35, 36, 37], "loop": [0, 1, 2, 5, 11, 19, 27, 28], "kmax_util": 0, "max_util": [0, 2, 27], "pack": [0, 1, 2, 5, 20, 27, 29, 36], "mani": [0, 2, 4, 11, 13, 27, 29, 38, 39, 41], "engin": [0, 1, 2, 4, 5, 6, 8, 12, 13, 17, 19, 20, 22, 27, 29, 34, 36, 39, 40], "inflightbatch": [0, 2], "while": [0, 1, 2, 3, 6, 13, 14, 15, 17, 18, 22, 25, 29, 36, 37, 41], "maxim": [0, 2, 14, 16, 27], "throughput": [0, 2, 4, 14, 15, 16, 20, 27, 40], "might": [0, 2, 11, 13, 18, 21, 36, 39], "requir": [0, 1, 2, 4, 5, 7, 8, 11, 13, 14, 18, 21, 22, 23, 24, 27, 28, 29, 30, 36, 38, 39, 40, 41], "some": [0, 2, 3, 4, 5, 6, 10, 11, 13, 22, 25, 27, 28, 35, 36, 40, 41], "paus": [0, 2, 27], "restart": [0, 2, 24], "peak": [0, 2, 14, 15, 20], "guaranteed_no_evict": [0, 2, 27], "more": [0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 14, 15, 16, 18, 21, 22, 25, 27, 28, 29, 35, 36, 40, 41], "conserv": [0, 2, 27], "guarante": [0, 2, 5, 13, 27], "start": [0, 1, 2, 6, 23, 24, 25, 26, 27, 28, 29, 31], "evict": [0, 1, 2, 7, 8, 25], "requeststag": 0, "repres": [0, 1, 14, 18, 28, 29, 34, 41], "state": [0, 1, 2, 3, 4, 6, 29, 41], "kqueu": 0, "yet": [0, 13, 15], "activ": [0, 1, 2, 4, 5, 6, 11, 14, 15, 18, 26, 27, 29, 37, 38], "due": [0, 1, 13, 16, 27, 28, 41], "constraint": [0, 4, 18, 29], "kcontext_in_progress": 0, "phase": [0, 1, 2, 6, 7, 14, 17, 20, 26, 27, 29, 36, 40, 41], "kgeneration_in_progress": 0, "kgeneration_complet": 0, "For": [0, 1, 2, 4, 5, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 27, 28, 29, 34, 35, 36, 39, 41], "c": [0, 1, 2, 4, 6, 7, 11, 12, 20, 23, 24, 25, 26, 27, 28, 29, 35, 40, 41], "trtlmmdatatyp": 0, "half": [0, 1, 11, 29], "kvcachestat": 0, "maxnumblock": 0, "max": [0, 2, 7, 8, 14, 15, 16, 26, 29, 36], "freenumblock": 0, "free": [0, 1, 2, 8, 11, 25, 26, 28, 34, 36], "usednumblock": 0, "tokensperblock": [0, 1, 5], "staticbatchingstat": 0, "singl": [0, 1, 2, 3, 4, 5, 11, 13, 16, 17, 19, 20, 22, 27, 28, 29, 31, 36, 37, 40, 41], "numscheduledrequest": 0, "numcontextrequest": [0, 1], "stage": [0, 4, 6, 36], "numctxtoken": 0, "total": [0, 1, 2, 4, 5, 10, 26, 27, 36, 41], "numgentoken": 0, "emptygenslot": 0, "unus": 0, "slot": [0, 1, 2], "inflightbatchingstat": 0, "numgenrequest": 0, "numpausedrequest": 0, "microbatchid": [0, 1], "index": [0, 1, 20, 23, 24, 26, 29, 38, 41], "mirco": 0, "timestamp": [0, 2, 26], "numactiverequest": 0, "maxnumactiverequest": 0, "gpumemusag": 0, "usag": [0, 4, 6, 11, 13, 14, 17, 20, 22, 27, 29, 40], "cpumemusag": 0, "pinnedmemusag": 0, "specif": [0, 1, 2, 3, 5, 6, 7, 8, 10, 13, 15, 18, 21, 26, 28, 29, 35, 41], "contextprefillposit": 0, "chunk": [0, 2, 20, 29, 36, 40], "prefil": 0, "posit": [0, 1, 29, 41], "numgeneratedtoken": 0, "so": [0, 1, 2, 4, 6, 7, 8, 12, 13, 19, 21, 22, 24, 27, 28, 29, 30, 35, 36, 38, 39, 41], "far": [0, 2, 19], "lack": 0, "resourc": [0, 4, 13], "exhaust": [0, 12], "helper": [1, 29], "host": [1, 2, 8, 21, 22, 27, 29, 35], "type": [1, 2, 4, 6, 7, 8, 10, 11, 15, 18, 19, 29, 31, 34, 37, 38, 39], "ibufferptr": 1, "uniqueptr": 1, "itensorptr": 1, "trimpool": 1, "construct": [1, 11, 19, 41], "de": 1, "etc": [1, 36], "destructor": [1, 2], "nvinfer1": 1, "kbyte_typ": 1, "cudamallocasync": 1, "emptybuff": 1, "creat": [1, 2, 6, 7, 9, 11, 12, 13, 19, 22, 23, 25, 26, 28, 29, 31, 34, 35, 36, 40, 41], "empti": [1, 2, 29, 41], "mai": [1, 2, 4, 5, 10, 11, 13, 19, 21, 22, 24, 25, 26, 27, 28, 29, 32, 36, 38, 39, 40, 41], "resiz": 1, "later": [1, 2, 8, 11, 13, 16, 22, 24, 36, 39, 41], "emptytensor": 1, "reshap": 1, "setmem": 1, "content": [1, 8, 13, 29, 36, 40], "src": [1, 11, 29], "dst": 1, "srctype": 1, "dsttype": 1, "copyfrom": 1, "memorypoolreserv": [1, 36], "reserv": [1, 2, 27, 36], "memorypoolus": 1, "memorypoolfre": [1, 36], "memorypooltrimto": 1, "try": [1, 2, 13, 19, 27, 28, 35, 36, 39], "trim": 1, "synchron": [1, 2, 11, 19, 39, 40], "implicitli": 1, "gpusync": 1, "cudamalloc": 1, "pinnedpool": 1, "mstream": 1, "mtrimpool": 1, "initmemorypool": [1, 36], "int": [1, 7, 10, 11, 13, 29, 30, 31, 32, 34], "buffermanagertest": 1, "stringptrmap": 1, "cudaevent_t": 1, "unsign": [1, 2], "flag": [1, 4, 13, 17, 19, 20, 22, 27, 29, 36, 40, 41], "cudaeventdisabletim": 1, "event": 1, "destroi": [1, 36], "creation": [1, 29], "By": [1, 5, 22, 29, 41], "ownsev": 1, "pass": [1, 2, 4, 6, 8, 11, 26, 29, 30, 34, 36, 41], "exist": [1, 8, 13, 22, 24, 34, 41], "object": [1, 5, 7, 11, 13, 19, 29, 30, 31, 32, 34, 36], "own": [1, 2, 10, 11, 12, 13, 21, 41], "associ": [1, 2, 7, 8, 19, 21, 29, 35], "element_typ": 1, "remove_pointer_t": 1, "eventptr": 1, "delet": [1, 32, 39], "mevent": 1, "mownsev": 1, "cudastreamnonblock": 1, "prioriti": 1, "cudastreamcreatewithflag": 1, "higher": [1, 2, 4, 5, 8, 14, 15, 17, 25, 27, 28, 36, 40, 41], "cudadevicegetstreampriorityrang": 1, "meaning": 1, "cudastream_t": 1, "ownsstream": 1, "getdevic": 1, "record": [1, 6], "mdevic": 1, "mownsstream": 1, "maxlength": 1, "maxbatchs": [1, 5], "step": [1, 2, 4, 6, 9, 10, 11, 12, 13, 14, 20, 22, 24, 25, 27, 28, 29, 34, 39, 41], "maxstopwordslen": 1, "maxbadwordslen": 1, "logitsvec": 1, "sequencelimitlength": 1, "badwordslist": 1, "badwordsptr": 1, "badwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "stopwordslen": 1, "norepeatngrams": 1, "batchslot": 1, "cacheindirect": 1, "medusainput": 1, "medusapath": 1, "medusatreeid": 1, "medusalogit": 1, "medusacurtokensperstep": 1, "medusatargettokensperstep": 1, "sharedptr": 1, "newtokensstep": 1, "newtoken": 1, "newtokensvec": 1, "finishedsum": 1, "parentid": 1, "beamhypothes": 1, "medusaoutput": 1, "knegativeinfin": 1, "1e20f": 1, "batchsiz": [1, 5, 15], "maxsequencelength": [1, 36], "releas": [1, 4, 5, 13, 14, 17, 18, 20, 22, 23, 24, 28, 29, 31, 36, 37, 38], "init": [1, 21, 22, 28], "slice": [1, 29], "batchindex": 1, "outputidstgt": 1, "sequencelengthstgt": 1, "normedscor": 1, "minnormedscor": 1, "numbeam": 1, "isdon": 1, "medusanextdrafttoken": 1, "medusaacceptedtokenslen": 1, "medusaacceptedlengthscumsum": 1, "medusapathsoffset": 1, "ttensor": 1, "genericgenerationinput": 1, "mark": [1, 2, 6, 29, 39], "aka": [1, 7, 29], "eo": [1, 5], "": [1, 2, 3, 5, 6, 9, 10, 11, 12, 13, 14, 16, 17, 21, 22, 24, 25, 27, 28, 29, 30, 31, 34, 36, 37, 39, 41], "50": [1, 8, 18], "256": [1, 14, 17, 26, 28, 40], "gpt2": [1, 39], "vocabulari": [1, 5, 30, 41], "257": 1, "fill": [1, 29], "greater": [1, 4, 5, 17, 18, 29], "equal": [1, 27, 29, 30, 36], "same": [1, 2, 4, 5, 6, 7, 8, 11, 13, 15, 27, 28, 29, 30, 34, 36, 41], "tensor": [1, 2, 5, 7, 10, 11, 14, 15, 16, 17, 19, 20, 28, 29, 30, 31, 34, 37, 39, 40], "That": [1, 2, 4, 5, 11, 25, 29], "maxinputlength": 1, "respect": [1, 3, 27, 28, 29, 34, 36, 37, 39], "sessionconfig": 1, "numtoken": 1, "sum": [1, 6, 9, 29], "In": [1, 6, 11, 13, 15, 18, 19, 20, 21, 22, 24, 28, 29, 35, 36, 37, 39, 41], "through": [1, 2, 4, 5, 6, 11, 12, 21, 25, 30, 35, 41], "instanc": [1, 2, 5, 6, 11, 34, 36, 41], "modelconfig": [1, 34], "futur": [1, 4, 13, 18, 20, 21, 22, 27, 28, 29, 36, 37, 41], "session": [1, 20, 34], "made": [1, 25], "flexibl": [1, 13, 21, 41], "automat": [1, 2, 6, 11, 22, 24, 25, 29, 36, 37], "embeddingbiasopt": 1, "point": [1, 2, 4, 5, 11, 12, 15, 18, 27, 28, 29, 35, 37, 39], "add": [1, 4, 6, 9, 10, 11, 13, 21, 22, 24, 29, 34, 39, 40], "dure": [1, 2, 4, 5, 6, 11, 17, 22, 26, 27, 29, 34, 36, 41], "after": [1, 2, 4, 6, 8, 11, 19, 21, 22, 26, 27, 28, 29, 30, 32, 35, 36, 39, 41], "project": [1, 4, 8, 21], "hidden": [1, 3, 4, 5, 8, 24, 29, 30, 36, 41], "vocabs": [1, 5], "argument": [1, 2, 19, 21, 27, 29, 36, 40], "integ": [1, 4, 5, 27, 29, 37], "encod": [1, 4, 5, 15, 29, 37, 38, 40], "ban": 1, "Its": [1, 2, 4, 5, 29], "2": [1, 2, 4, 6, 7, 8, 10, 13, 14, 15, 16, 18, 22, 24, 26, 27, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41], "badwordslength": 1, "explain": [1, 5, 11, 12, 27, 29, 36, 37], "trigger": [1, 4, 6, 11, 26, 27], "stopwordslength": 1, "let": [1, 6, 10, 25, 29], "three": [1, 10, 18, 19, 29, 37], "describ": [1, 2, 4, 5, 7, 8, 9, 11, 12, 17, 21, 23, 26, 28, 29, 37, 39, 41], "represent": [1, 6, 11], "those": [1, 4, 5, 10, 11, 12, 19, 26, 27, 28, 29, 30, 37], "5": [1, 8, 10, 14, 15, 16, 18, 22, 27, 28, 29, 38, 39, 40, 41], "7": [1, 8, 14, 15, 18, 20, 22, 28, 29, 39], "3": [1, 4, 6, 7, 8, 14, 15, 16, 18, 22, 23, 24, 28, 29, 34, 36, 38, 39, 40], "second": [1, 2, 14, 16, 17, 19, 29, 41], "9": [1, 2, 8, 13, 15, 20, 22, 28, 29, 38, 39, 41], "third": [1, 2, 19], "4": [1, 6, 8, 13, 16, 18, 28, 29, 35, 36, 37, 38, 39, 40, 41], "row": [1, 2, 8, 29, 37], "inclus": [1, 2, 29], "prefix": [1, 10, 29, 39, 41], "shown": [1, 7, 16, 21, 29], "follow": [1, 2, 5, 6, 8, 10, 11, 18, 19, 21, 22, 24, 27, 28, 29, 35, 37, 38, 41], "diagram": [1, 41], "v": [1, 4, 5, 8, 14, 15, 18, 20, 22, 28, 29, 35, 37, 39], "case": [1, 2, 4, 5, 15, 18, 28, 29, 36, 37, 41], "inner": [1, 29], "instead": [1, 6, 11, 13, 14, 21, 27, 29, 36, 40, 41], "8212": 1, "genericgenerationoutput": 1, "maxseqlength": [1, 2], "shorter": [1, 4, 27], "than": [1, 4, 5, 6, 11, 14, 15, 16, 18, 25, 27, 29, 34, 36, 39, 41], "ad": [1, 4, 5, 6, 13, 20, 22, 26, 29, 34, 40, 41], "note": [1, 2, 6, 8, 11, 16, 18, 19, 20, 27, 29, 34, 36, 37, 38, 41], "version": [1, 4, 5, 10, 13, 21, 22, 23, 28, 29, 39, 40], "tensorrt": [1, 4, 5, 6, 8, 9, 14, 17, 19, 23, 24, 26, 29, 34, 37, 38, 39], "its": [1, 2, 4, 5, 6, 10, 11, 13, 14, 16, 25, 27, 29, 35, 36], "previou": [1, 2, 3, 13, 15], "prob": [1, 2, 7], "like": [1, 2, 4, 5, 6, 10, 11, 12, 13, 18, 22, 24, 25, 27, 28, 29, 35, 36, 37, 40], "chang": [1, 4, 13, 14, 16, 17, 20, 21, 22, 25, 27, 29, 31, 34, 36], "remove_input_pad": [1, 4, 8, 27, 29, 30, 32, 34, 39], "packeds": 1, "built": [1, 2, 5, 11, 13, 19, 21, 22, 27, 28, 29, 35, 36, 39, 40], "gather_context_logit": [1, 2, 31, 34], "gather_all_token_logit": [1, 2, 40], "enabl": [1, 2, 4, 5, 6, 8, 11, 15, 16, 17, 18, 20, 21, 27, 28, 29, 30, 35, 36, 37, 39, 40, 41], "you": [1, 2, 3, 4, 5, 6, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 34, 35, 36, 39, 41], "acquisit": 1, "method": [1, 4, 5, 10, 11, 13, 14, 19, 22, 28, 34, 37, 41], "pleas": [1, 2, 6, 14, 16, 17, 18, 23, 35, 39], "gptsessionbenchmark": [1, 26, 28, 40], "cpp": [1, 2, 4, 5, 7, 11, 19, 20, 21, 22, 28, 29, 39], "import": [1, 13, 14, 18, 20, 22, 23, 24, 38, 40, 41], "out": [1, 7, 8, 13, 14, 15, 16, 17, 22, 26, 28, 29, 35, 36], "impact": [1, 14, 18, 27, 28, 41], "perform": [1, 2, 4, 5, 6, 7, 8, 11, 12, 13, 14, 16, 17, 19, 21, 25, 28, 29, 35, 38, 40], "languag": [1, 5, 11, 12, 14, 23, 25, 26, 29, 37, 41], "head": [1, 5, 11, 14, 20, 29, 36, 40, 41], "lm": [1, 41], "matrix": [1, 4, 11, 17, 20, 25, 29, 35], "just": [1, 26, 35, 36, 41], "maxoutputlen": 1, "gather_generation_logit": [1, 2, 31, 34], "also": [1, 2, 4, 5, 6, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22, 23, 26, 27, 29, 35, 36, 37, 41], "obtain": [1, 2, 12, 19, 23, 27, 29], "ontokengener": 1, "callback": [1, 19], "invok": [1, 2, 6], "caller": 1, "continu": [1, 2, 4, 16, 18, 19, 25, 41], "implement": [1, 2, 4, 5, 10, 11, 12, 13, 14, 25, 27, 28, 29, 35, 37, 38, 41], "boolean": [1, 2, 19, 29], "layer": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 27, 29, 35, 36, 37, 39, 41], "igptdecod": 1, "subclass": [1, 13], "virtual": [1, 30], "setup": [1, 4, 22, 34, 35, 36], "forward": [1, 2, 6, 11, 27, 30, 31, 39, 41], "forwardasync": 1, "gathertre": 1, "finaloutputid": 1, "acceptdrafttokensbyid": 1, "targettokenid": 1, "drafttokenid": 1, "contextlength": 1, "numdrafttoken": 1, "sequencelength": 1, "finishedvec": 1, "finishedfin": 1, "acceptdrafttokensbylogit": 1, "draftlogit": 1, "targetlogit": 1, "draftprob": 1, "targetprob": 1, "userandomacceptthreshold": 1, "randomacceptthreshold": 1, "curandstate_t": 1, "curandst": 1, "updatekvcachebasedonacceptedtoken": 1, "acceptedoffset": 1, "packedacceptedid": 1, "pointerarrai": 1, "pastkeyvaluelength": 1, "rewinddrafttokencount": 1, "maxblocksperseq": 1, "dtype": [1, 6, 8, 10, 11, 13, 28, 29, 30, 31, 33, 34, 39], "maxtokensperstep": 1, "maxnummedusahead": 1, "overrid": [1, 2, 13, 34], "mmanag": 1, "dynamicdecodelay": 1, "mdynamicdecodelay": 1, "mlogprobstil": 1, "msamplingconfig": 1, "cudadeviceprop": 1, "mprop": 1, "mmaxbatchs": 1, "gpt": [1, 2, 4, 7, 11, 12, 15, 18, 20, 29, 36, 37, 38, 39, 40, 41], "flight": [1, 12, 19, 20, 35, 36], "sharedconstptr": 1, "fuseddecod": 1, "newbatch": 1, "initi": [1, 36, 39, 41], "newrequest": 1, "seqslot": 1, "decoder_batch": 1, "tokenptr": 1, "forwardsync": 1, "thread": [1, 2, 4, 27], "getfinish": 1, "getoutputid": 1, "batchidx": 1, "gather": [1, 22, 29], "getparentid": 1, "parent": [1, 13], "collect": [1, 2, 6, 11, 28, 29, 41], "getcumlogprob": 1, "getlogprob": 1, "getallnewtoken": 1, "getnewtoken": 1, "within": [1, 4, 11, 22, 28, 29, 41], "getnbstep": 1, "getnbfinish": 1, "getnextdrafttoken": 1, "predict": [1, 4, 41], "next": [1, 2, 11, 13, 15, 20, 25, 34, 36, 41], "getmedusaacceptedlengthscumsum": 1, "exclus": [1, 5, 37], "getmedusaacceptedpackedpath": 1, "maxmedusahead": 1, "gptdecoderptr": 1, "decodinginputptr": 1, "decodingoutputptr": 1, "postprocessrequest": 1, "allocatemedusabuff": 1, "medusa": [1, 29, 34, 40], "setupmedusa": 1, "newrequestspeculativedecod": 1, "intern": [1, 2, 4, 13, 36, 39], "newrequestmedusa": 1, "forwardasyncunfuseddecod": 1, "eventstart": 1, "asynchron": [1, 19], "unfus": 1, "whole": [1, 25, 29, 36], "forwardasyncfuseddecod": 1, "fuse": [1, 4, 11, 35, 41], "mvocabs": 1, "mvocabsizepad": 1, "mbuffermanag": 1, "mforwardtoken": 1, "mforwardev": 1, "mdecod": 1, "mdecodinginput": 1, "mdecodingoutput": 1, "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "macceptbylogit": 1, "mnumdrafttoken": 1, "mcurandst": 1, "mnbstep": 1, "mfinish": 1, "mfinishedsum": 1, "mmaxnewtoken": 1, "mgeneratedtokensperenginestep": 1, "mfinishedstep": 1, "mdraftprob": 1, "mtargetprob": 1, "mdrafttokenid": 1, "mdraftlogit": 1, "mbatchslotssetup": 1, "mbatchslotsdecod": 1, "mbatchslotsaccepttoken": 1, "mbatchslotsacceptlogit": 1, "mtargetlogitsptr": 1, "mmaxsequencelength": 1, "mactualbatchs": 1, "mmaxtokensperenginestep": 1, "mmaxstopwordslen": 1, "mmaxbadwordslen": 1, "mmaxtokensperdecoderstep": 1, "mfuseddecod": 1, "musemedusa": 1, "precis": [1, 5, 14, 18, 20, 35, 36, 38], "tensorparallel": [1, 5], "pipelineparallel": [1, 5], "getmodelconfig": 1, "getnam": 1, "getvers": 1, "getprecis": 1, "gettensorparallel": 1, "getpipelineparallel": 1, "getworlds": 1, "enginefilenam": 1, "pars": 1, "istream": 1, "mname": 1, "mversion": 1, "mprecis": 1, "mtensorparallel": 1, "mpipelineparallel": 1, "mgptmodelconfig": 1, "mambaconfig": 1, "dstate": [1, 29], "dconv": [1, 29], "expand": [1, 16, 18, 29], "modelvari": 1, "kgpt": 1, "kglm": 1, "kmamba": 1, "nblayer": 1, "nbhead": 1, "hiddens": [1, 5], "getvocabs": 1, "getvocabsizepad": 1, "worldsiz": 1, "getnblay": 1, "getnbhead": 1, "getnbkvhead": 1, "setnbkvhead": 1, "nbkvhead": 1, "gethiddens": 1, "getsizeperhead": 1, "setsizeperhead": 1, "sizeperhead": 1, "usegptattentionplugin": [1, 5], "usemambaconv1dplugin": 1, "usepackedinput": 1, "inputpack": [1, 5], "usepagedkvcach": 1, "pagedkvcach": [1, 5], "usepagedst": 1, "pagedst": 1, "gettokensperblock": 1, "settokensperblock": 1, "quantmod": [1, 4, 5, 20, 29, 30, 31, 33, 34], "getquantmod": 1, "setquantmod": 1, "supportsinflightbatch": 1, "getmaxbatchs": 1, "setmaxbatchs": 1, "getmaxinputlen": 1, "setmaxinputlen": 1, "maxinputlen": [1, 5], "getmaxsequencelen": 1, "setmaxsequencelen": 1, "maxsequencelen": [1, 5], "getmaxnumtoken": 1, "setmaxnumtoken": 1, "maxnumtoken": 1, "useprompttun": 1, "getmaxpromptembeddingtables": 1, "setmaxpromptembeddingtables": 1, "maxpromptembeddingtables": 1, "computecontextlogit": 1, "computegenerationlogit": 1, "getmodelvari": 1, "setmodelvari": 1, "usecustomallreduc": 1, "customallreduc": 1, "setmaxdraftlen": 1, "maxdraftlen": 1, "getmaxdraftlen": 1, "getmaxtokensperstep": 1, "setusecontextfmhaforgener": 1, "usecontextfmhaforgener": 1, "getcontextfmhaforgener": 1, "setpagedcontextfmha": 1, "pagedcontextfmha": 1, "getpagedcontextfmha": 1, "useloraplugin": 1, "getloramodul": 1, "setloramodul": 1, "getmlphiddens": 1, "setmlphiddens": 1, "mlphiddens": 1, "getmaxlorarank": 1, "setmaxlorarank": 1, "maxlorarank": 1, "usemedusa": 1, "medusamodul": 1, "getmedusamodul": 1, "setmedusamodul": 1, "getkvdatatyp": 1, "istransformerbas": 1, "hasmambaconfig": 1, "getmambaconfig": 1, "setmambaconfig": 1, "isssmbas": 1, "mnblayer": 1, "mnbhead": 1, "mnbkvhead": 1, "mhiddens": 1, "msizeperhead": 1, "mdatatyp": 1, "musegptattentionplugin": 1, "musemambaconv1dplugin": 1, "minputpack": 1, "mpagedkvcach": 1, "mpagedst": 1, "mtokensperblock": 1, "mquantmod": 1, "mmaxinputlen": 1, "mmaxsequencelen": 1, "mmaxnumtoken": 1, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mmodelvari": 1, "musecustomallreduc": 1, "mmaxpromptembeddingtables": 1, "mmaxdraftlen": 1, "musecontextfmhaforgener": 1, "mpagedcontextfmha": 1, "museloraplugin": 1, "mloramodul": 1, "mmlphiddens": 1, "mmaxlorarank": 1, "mmedusamodul": 1, "mmambaconfig": 1, "batch_manag": [1, 2], "kv_cache_manag": 1, "loggerptr": 1, "ilogg": 1, "engines": 1, "logger": 1, "environ": [1, 2, 5, 21, 22, 23, 24, 28, 39, 41], "compil": [1, 5, 12, 20, 21, 25, 29], "enginefil": 1, "getlogg": 1, "getbuffermanag": 1, "getworldconfig": 1, "getlogitdatatyp": 1, "generationprofil": 1, "read": [1, 2, 4, 11, 35], "popul": [1, 2, 4, 11, 29], "produc": [1, 2, 6, 11, 19, 22, 27, 29], "product": [1, 4, 11, 16, 23, 25, 29, 35, 41], "pseudo": [1, 4, 29, 37], "code": [1, 2, 4, 6, 11, 13, 18, 20, 25, 28, 29, 37, 38, 39, 41], "look": [1, 13, 17, 21, 25, 26], "were": [1, 10, 13, 14, 18, 28, 41], "simpl": [1, 6, 11, 21, 25, 41], "allfinish": 1, "limit": [1, 4, 5, 6, 11, 13, 18, 22, 25, 27, 28, 29, 34, 36, 38], "computelogit": 1, "generatetokensfromlogit": 1, "kvcachemanag": [1, 4, 34], "tokengeneratedcallback": 1, "usecudagraph": 1, "generatebatch": 1, "microbatchesoutput": 1, "microbatchesinput": 1, "createcontext": 1, "createbuff": 1, "nummicrobatch": 1, "createdecod": 1, "logitstyp": 1, "decoderperrequest": 1, "createkvcachemanag": 1, "createcustomallreduceworkspac": 1, "executecontextstep": 1, "generationbatchesinput": 1, "generationbatchesoffset": 1, "executegenerationstep": 1, "microbatchoffset": 1, "microbatchesfinish": 1, "decoderstepasync": 1, "decoderstep": 1, "pp": [1, 5, 8, 14, 17, 29], "shouldstopsync": 1, "shouldstop": 1, "access": [1, 2, 19, 22, 28, 29, 40], "kvcacheaddsequ": 1, "firstbatchidx": 1, "initdecod": 1, "outputid": [1, 2], "createontokengeneratedcallback": 1, "mmodelconfig": 1, "mworldconfig": 1, "ncclcommun": 1, "mpipelinecomm": 1, "mcommstream": 1, "mcommev": 1, "mcommptr": 1, "ipcmemori": 1, "mipcmemoryhandl": 1, "mdecodermaxsequencelength": 1, "mdecodermaxattentionwindow": 1, "mdecodersinktokenlength": 1, "mlogger": 1, "tllmruntim": [1, 5], "mruntim": 1, "mkvcachemanag": 1, "microbatchconfig": 1, "mmicrobatchconfig": 1, "runtimebuff": 1, "mbuffer": 1, "mreceivedev": 1, "mcudagraphmod": 1, "cudagraphexecutor": 1, "mcudagraphinst": 1, "trtgptmodelv1": 1, "smaller": [1, 26, 27, 28, 29, 36, 41], "divid": [1, 29], "micro": [1, 2, 27, 36], "cudagraphmod": 1, "ctxmicrobatchs": 1, "genmicrobatchs": 1, "hasinst": 1, "clear": [1, 34], "preparenextgraph": 1, "nextcontextid": 1, "launch": [1, 2, 11, 20, 22, 39, 40], "cudagraph_t": 1, "graph": [1, 11, 20, 29, 35, 36, 39], "updat": [1, 11, 13, 16, 21, 22, 23, 27, 28, 34, 41], "uploadtostream": 1, "cudagraphexec_t": 1, "minstanc": 1, "profil": [1, 20, 29, 36, 40], "getstart": 1, "getend": 1, "getelapsedtimem": 1, "cudaeventdefault": 1, "numctxpergen": 1, "getgengraphid": 1, "flipflopid": 1, "generationbatchid": 1, "flip": [1, 29], "flop": 1, "between": [1, 2, 4, 5, 11, 13, 22, 24, 26, 27, 29, 36, 41], "numctxbatch": 1, "numgenbatch": 1, "ctxbatchsiz": 1, "genbatchs": 1, "loadengin": 1, "enginepath": 1, "memorytypestr": 1, "kpin": 1, "datatypetrait": 1, "kfloat": [1, 11], "char": 1, "sizeof": 1, "khalf": 1, "int8": [1, 10, 13, 18, 20, 29, 36, 38, 40], "int32": [1, 4, 29], "int64": [1, 29], "uint32_t": [1, 29], "uint32": 1, "uint64": 1, "kunsign": 1, "uint8": 1, "trtdatatyp": 1, "bufferdatatyp": 1, "pointerelementtyp": 1, "remove_reference_t": 1, "remove_const_t": 1, "constpointercast": 1, "ptr": 1, "d": [1, 2, 7, 8, 22, 28, 29, 35], "buffercast": 1, "ostream": 1, "print": [1, 4, 22, 24, 26, 28, 36, 39], "kdatatyp": 1, "kisunsign": 1, "kispoint": 1, "wrapper": [1, 6, 13], "around": [1, 10, 13, 25], "_unsign": 1, "ispoint": 1, "isunsign": 1, "ktrtpointertyp": 1, "munsign": 1, "mpointer": 1, "kunderlyingtyp": 1, "uniqueconstptr": 1, "getcapac": 1, "capac": [1, 14, 16, 18], "getdatatypenam": 1, "getmemorytypenam": 1, "newsiz": 1, "op": [1, 6, 29], "Not": [1, 18], "offset": [1, 29, 34, 37], "view": [1, 29], "tconstptr": 1, "enable_if_t": 1, "is_const_v": 1, "independ": [1, 19, 29, 41], "cannot": [1, 5, 11, 27, 28, 29, 39], "beyond": [1, 15, 23, 27], "determin": [1, 4, 13, 27, 29, 31, 37], "protect": 1, "tobyt": 1, "bufferrang": 1, "actual": [1, 2, 6, 18, 27, 29, 41], "maxseqlen": 1, "consttensorptr": 1, "bufferptr": 1, "inputlen": 1, "drafttoken": [1, 41], "computecumlogprob": 1, "computelogprob": 1, "generatedtokensperenginestep": 1, "tensorconstptr": 1, "reshapebuff": 1, "decltyp": 1, "volum": [1, 21], "squeez": 1, "remov": [1, 4, 5, 6, 11, 21, 23, 28, 29, 36, 40], "unit": [1, 20, 21, 22], "unsqueez": [1, 29], "shapeequ": 1, "count": [1, 2, 26, 35], "nbdim": 1, "volumenonneg": 1, "w": [1, 17, 37, 38], "r": [1, 7, 8, 23, 28, 29, 39], "makeshap": 1, "conveni": [1, 13], "tostr": 1, "lh": 1, "compar": [1, 5, 15, 16, 18, 27, 29], "castsiz": 1, "setpeeraccess": 1, "buffers": 1, "getcommptrstensor": 1, "flags_siz": 1, "kernel": [1, 4, 5, 11, 14, 25, 26, 27, 28, 29, 35, 36, 39, 40], "max_all_reduce_block": 1, "allocateipcmemori": 1, "destroyipcmemori": 1, "mbuffers": 1, "mbufferptr": 1, "difftyp": 1, "ptrdiff_t": 1, "getgpu": 1, "getcpu": 1, "getpin": 1, "getuvm": 1, "getgpudiff": 1, "getcpudiff": 1, "getpinneddiff": 1, "getuvmdiff": 1, "dealloc": 1, "getinst": 1, "bytestostr": 1, "atom": 1, "mgpu": 1, "mcpu": 1, "mpin": 1, "muvm": 1, "mgpudiff": 1, "mcpudiff": 1, "mpinneddiff": 1, "muvmdiff": 1, "genericprompttuningparam": 1, "prompttuningen": 1, "filltaskstensor": 1, "taskshost": 1, "reqbeamwidth": 1, "reqpromptlength": 1, "packedinput": 1, "set_from_opt": 1, "varnam": 1, "vartyp": 1, "executor": [1, 5, 40], "optvec": 1, "draftacceptancethreshold": 1, "topkmedusahead": 1, "vec": 1, "fusevalu": 1, "ci": 1, "accessor": 1, "sever": [1, 2, 4, 6, 10, 29, 34, 36, 39, 41], "asciichar": 1, "getlevel": 1, "setlevel": 1, "level": [1, 2, 4, 8, 9, 10, 13, 19, 26, 36, 40], "gpuspernod": [1, 5], "kdefaultgpuspernod": 1, "istensorparallel": 1, "ispipelineparallel": 1, "getrank": 1, "getgpuspernod": 1, "getgpuspergroup": 1, "getpipelineparallelrank": 1, "gettensorparallelrank": 1, "isfirstpipelineparallelrank": 1, "islastpipelineparallelrank": 1, "my": 1, "pipelin": [1, 2, 5, 11, 14, 17, 19, 36], "getlastrank": 1, "getpipelineparallelgroup": 1, "validmpiconfig": 1, "mrank": 1, "mgpuspernod": 1, "underlyingtyp": 1, "isnon": 1, "istopk": 1, "istopp": 1, "istopkortopp": 1, "istopkandtopp": 1, "isbeamsearch": 1, "ismedusa": 1, "none": [1, 6, 13, 29, 30, 31, 32, 34, 39], "topktopp": 1, "beamsearch": 1, "anybitset": 1, "bit": [1, 2, 4, 5, 15, 22, 29, 37], "allbitset": 1, "mstate": 1, "knone": 1, "ktopk": 1, "1u": 1, "ktopp": 1, "kbeamsearch": 1, "kmedusa": 1, "ktopktopp": 1, "to_str": 1, "tasklayermoduleconfig": 1, "o": [1, 6, 8, 13, 17, 36], "loracachepagemanag": 1, "page": [1, 2, 5, 16, 20, 29, 35, 36, 40], "pre": [1, 4, 10, 12, 22, 23, 25, 28, 29, 35, 36], "safe": [1, 2, 6], "claimpag": 1, "numpag": 1, "claim": 1, "tupl": [1, 29, 30, 34], "pageid": 1, "numavailablepag": 1, "releasepag": 1, "blockptr": 1, "blockidx": 1, "pageptr": 1, "pageidx": 1, "mutablepageptr": 1, "mutabl": 1, "mpageblock": 1, "mfreepageid": 1, "mispagefre": 1, "lru": 1, "put": [1, 2, 10, 25], "progress": [1, 29], "done": [1, 11, 25, 28, 29, 32], "optim": [1, 2, 5, 6, 11, 12, 13, 14, 15, 16, 17, 18, 20, 23, 25, 28, 29, 35, 36, 38, 40, 41], "numslot": 1, "x": [1, 7, 8, 29, 30, 31, 35, 37], "pagewidth": 1, "conceptu": 1, "smallest": [1, 29], "ceildiv": 1, "num": 1, "taskidtyp": 1, "tasklayermoduleconfiglistptr": 1, "pagemanagerconfig": 1, "param": [1, 7, 29, 30, 31, 34], "load": [1, 2, 8, 10, 11, 13, 17, 27, 28, 31, 33, 40], "otherwis": [1, 2, 4, 5, 28, 29, 34, 39], "do": [1, 6, 13, 18, 20, 22, 26, 28, 29, 35], "loadweight": 1, "design": [1, 11, 13, 18, 27, 41], "taslid": 1, "isload": 1, "place": [1, 27, 29], "necessarili": [1, 11, 36], "bump": 1, "make": [1, 4, 6, 7, 8, 11, 13, 18, 20, 21, 25, 28, 29, 35, 39, 40], "recent": [1, 3, 4, 15], "marktaskdon": 1, "mean": [1, 3, 4, 5, 10, 13, 15, 16, 26, 27, 28, 29, 32, 34, 36, 41], "markalldon": 1, "determinenumpag": 1, "need": [1, 2, 4, 5, 6, 10, 11, 12, 13, 19, 21, 22, 25, 26, 27, 28, 29, 31, 32, 34, 36, 39, 41], "fit": [1, 2, 4, 14, 15], "copytask": 1, "devicecach": 1, "markdon": 1, "othercach": 1, "move": [1, 13, 22, 24, 25, 39, 40], "getnumpag": 1, "getpageptr": 1, "copytopag": 1, "moduleidtomodel": 1, "modelidtomodel": 1, "map": [1, 2, 4, 6, 7, 10, 11, 13, 19, 28, 29, 30, 31, 34, 35], "splittransposecpu": 1, "tpsize": 1, "tprank": 1, "split": [1, 3, 4, 5, 11, 29, 34, 36, 40], "part": [1, 3, 6, 11, 13, 21, 25, 27, 29], "write": [1, 9], "valuestatu": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvalue_status_load": 1, "taskvalueptr": 1, "taskvalu": 1, "cachevalu": 1, "bumptaskinprogress": 1, "getstatu": 1, "claimpageswithevict": 1, "runtime_error": 1, "pair": [1, 14, 29], "copytaskmappag": 1, "targettaskvalu": 1, "sourcetaskvalu": 1, "targetpageid": 1, "targetcach": 1, "insid": [1, 13, 21, 23, 29, 36], "mpagemanagerconfig": 1, "mutex": 1, "mpagesmutex": 1, "mcachepagemanag": 1, "mcachemutex": 1, "mcachemap": 1, "minprogresstask": 1, "mdonetask": 1, "mdevicebuffermanag": 1, "mmoduleidtomodul": 1, "splittransposecpuinn": 1, "slotidx": 1, "insiz": 1, "outsiz": 1, "moduleid": [1, 8], "layerid": [1, 8], "adapters": 1, "weightsinpoint": 1, "weightsoutpoint": 1, "inprogress": 1, "loadinprogress": 1, "concurr": [1, 2, 15, 27, 41], "doc": [1, 13, 17, 29, 38, 39, 40], "memtyp": 1, "totalnumpag": 1, "maxpagesperblock": 1, "slotsperpag": 1, "setmemorytyp": 1, "setdatatyp": 1, "gettotalnumpag": 1, "settotalnumpag": 1, "getmaxpagesperblock": 1, "setmaxpagesperblock": 1, "getslotsperpag": 1, "setslotsperpag": 1, "getpagewidth": 1, "setpagewidth": 1, "getinittozero": 1, "setinittozero": 1, "inittozero": 1, "setnumcopystream": 1, "mmemorytyp": 1, "mtotalnumpag": 1, "mmaxpagesperblock": 1, "mslotsperpag": 1, "mpagewidth": 1, "minittozero": 1, "moduletyp": 1, "kinvalid": 1, "kattn_qkv": 1, "kattn_q": 1, "kattn_k": 1, "kattn_v": 1, "kattn_dens": 1, "kmlp_h_to_4h": 1, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kcross_attn_qkv": 1, "kcross_attn_q": 1, "kcross_attn_k": 1, "kcross_attn_v": 1, "kcross_attn_dens": 1, "indim": 1, "outdim": 1, "indimfirst": 1, "outdimfirst": 1, "intpsplitdim": 1, "outtpsplitdim": 1, "flattenedinouts": 1, "localins": 1, "localouts": 1, "localindim": 1, "localoutdim": 1, "localinadapters": 1, "localoutadapters": 1, "localinouts": 1, "string_view": 1, "createloramodul": 1, "loramodulenam": 1, "numattentionhead": 1, "numkvattentionhead": 1, "attentionheads": 1, "tomoduletyp": 1, "tomodulenam": 1, "mtype": 1, "mindim": 1, "moutdim": 1, "mindimfirst": 1, "moutdimfirst": 1, "mintpsplitdim": 1, "mouttpsplitdim": 1, "reli": [2, 4, 6, 13, 26, 37], "compon": [2, 4, 11, 12, 18, 19, 37], "known": [2, 4, 11, 20, 29, 38, 41], "techniqu": [2, 4, 6, 11, 14, 25, 27, 37, 40, 41], "aim": [2, 10, 25], "reduc": [2, 4, 11, 14, 17, 21, 25, 27, 29, 36, 39, 40, 41], "queue": [2, 25], "elimin": [2, 25], "featur": [2, 4, 5, 6, 8, 10, 11, 13, 18, 19, 20, 21, 27, 28, 29, 32, 35, 38], "via": [2, 21, 22, 29, 41], "expos": [2, 5, 11, 21, 27], "hook": 2, "user": [2, 4, 5, 6, 8, 11, 12, 13, 17, 18, 19, 21, 22, 26, 27, 28, 29, 31, 36, 37, 39], "regist": [2, 39], "softwar": [2, 4, 5, 19, 20, 25, 40], "client": [2, 19, 35], "text": [2, 4, 5, 19, 25, 39], "interact": [2, 19, 22, 25, 39, 41], "two": [2, 3, 4, 5, 6, 10, 11, 13, 15, 19, 21, 22, 24, 27, 28, 29, 30, 34, 41], "Their": 2, "signatur": [2, 6, 29], "h": [2, 4, 19, 29, 41], "file": [2, 3, 4, 6, 10, 11, 19, 26, 28, 34, 35], "These": [2, 13, 14, 16, 17, 22, 28, 35, 41], "regular": [2, 4, 29], "interv": 2, "serv": [2, 4, 11, 12, 17, 18, 27, 41], "varieti": [2, 40], "entri": [2, 8], "getinferencerequestscallback": 2, "inferencerequest": [2, 7, 8, 40], "neg": [2, 27, 29], "unbound": 2, "deliv": [2, 14, 17, 28], "sendresponsecallback": 2, "conform": 2, "non": [2, 4, 5, 11, 13, 18, 28, 29, 40], "messag": [2, 29, 36], "encount": [2, 39], "properli": [2, 22, 24, 27], "handl": [2, 13, 14, 27, 29, 30, 35], "reject": 2, "pollstopsignalcallback": 2, "unordered_set": 2, "ensur": [2, 6, 13], "report": [2, 26, 36, 40], "returnbatchmanagerstatscallback": 2, "packag": [2, 19, 21, 22, 39], "field": [2, 5, 10, 13, 25, 28, 37, 40, 41], "put_tim": 2, "tm": 2, "m": [2, 15, 22, 28, 29, 37], "y": [2, 17, 21, 23, 37], "counter": [2, 26], "global": [2, 4, 7, 11], "monoton": 2, "over": [2, 15, 16, 18, 26, 29, 41], "addit": [2, 4, 11, 13, 16, 19, 21, 22, 26, 29, 30, 37, 38, 41], "across": [2, 4, 5, 6, 11, 16, 27, 29], "microbatch": [2, 26], "v1": [2, 27, 38], "alter": [2, 6, 19], "network": [2, 3, 4, 6, 11, 12, 13, 19, 28, 29, 35, 36, 37, 39], "attach": 2, "requestidtyp": 2, "tstream": 2, "altern": [2, 19], "howev": [2, 4, 13, 14, 19, 25, 27, 28, 36, 38, 41], "slow": [2, 19, 25], "down": [2, 8, 14, 19, 22, 25, 29, 35], "isn": [2, 19, 22], "moment": [2, 19], "trtenginepath": 2, "directori": [2, 9, 10, 11, 13, 19, 21, 22, 24, 28, 31, 34, 40], "inflightfusedbatch": 2, "improv": [2, 4, 5, 11, 14, 15, 16, 17, 18, 25, 27, 28, 40], "leverag": [2, 7, 14, 35, 41], "fusion": [2, 6, 20, 25, 36, 37], "opportun": 2, "strictli": 2, "superior": 2, "trtgptmodeloptionalparam": [2, 27], "encapsul": [2, 4, 5, 11, 29], "unspecifi": [2, 29], "well": [2, 4, 5, 11, 12, 15, 26, 27, 37, 38], "attend": [2, 27], "slide": [2, 20, 27, 29, 34], "streamingllm": [2, 20, 32, 40], "mha": [2, 4, 14, 27, 29], "mqa": [2, 4, 14, 17, 27, 29, 40], "previous": [2, 14, 28], "enabletrtoverlap": 2, "partit": [2, 4, 27], "hide": [2, 27], "give": [2, 25, 27, 31], "benefit": [2, 6, 16, 18, 25, 27, 28], "big": [2, 27], "enough": [2, 4, 27, 36], "overlap": 2, "overhead": [2, 11, 27], "too": [2, 4, 27], "small": [2, 4, 11, 18, 27, 36, 38, 41], "possibl": [2, 4, 5, 11, 21, 25, 27, 29, 36, 39, 41], "turn": [2, 4, 21, 27, 34, 36], "peftcachemanagerconfig": 2, "use_lora_plugin": [2, 34], "build": [2, 4, 5, 6, 8, 12, 20, 24, 25, 32, 35, 36, 39, 40, 41], "adapter_s": [2, 7, 8], "typic": [2, 6, 11, 13, 18, 36], "adapt": [2, 7, 8], "fix": [2, 8, 36], "exactli": 2, "least": [2, 4, 5, 13], "larg": [2, 4, 11, 12, 13, 14, 18, 23, 25, 26, 27, 28, 29, 36, 41], "worker": [2, 28], "h2d": 2, "05": [2, 27, 28, 29, 30, 31, 39], "percent": 2, "peft": 2, "1g": [2, 35, 39], "inputid": 2, "promptlength": 2, "dummi": 2, "outputlength": 2, "gptmanagerbenchmark": [2, 26, 40], "rememb": 2, "To": [2, 4, 7, 8, 11, 12, 13, 16, 20, 21, 22, 25, 26, 28, 29, 35, 36, 37, 40, 41], "redund": [2, 41], "much": [2, 11, 36], "we": [2, 3, 5, 6, 8, 10, 13, 17, 18, 24, 26, 28, 29, 35, 39, 40, 41], "recommend": [2, 4, 5, 12, 15, 18, 22, 24, 27, 28, 36, 39, 41], "max_batch_s": [2, 4, 8, 10, 13, 27, 28, 29, 31, 34, 36], "max_beam_width": [2, 4, 27, 29, 31, 34, 36], "max_input_len": [2, 8, 10, 27, 28, 31, 34, 36], "max_output_len": [2, 8, 10, 28, 34, 35, 39], "close": [2, 4, 13, 22, 24, 36], "integr": [2, 40, 41], "work": [2, 4, 5, 6, 11, 13, 21, 22, 24, 25, 27, 28, 29, 37, 38, 40], "item": [2, 34], "assum": [2, 29, 41], "style": [2, 4], "autoregress": 2, "architectur": [2, 3, 5, 10, 15, 21, 22, 25, 28, 31, 38], "spawn": 2, "persist": [2, 18, 28], "intend": [2, 22, 36], "system": [2, 11, 15, 20, 21, 22, 24, 27, 28, 38, 40], "retir": 2, "notifi": 2, "final_respons": 2, "relat": [2, 3, 21, 26, 29, 36, 39, 40], "freed": 2, "regress": [2, 4, 5, 11], "batchmanag": 2, "pathtotrtengin": 2, "trtgptmodeltyp": 2, "getinferencerequestscb": 2, "abov": [2, 8, 11, 13, 18, 21, 26, 28, 36], "sendresponsecb": 2, "pollstopsignalcb": 2, "returnbatchmanagerstatscb": 2, "help": [2, 4, 6, 11, 19, 21, 27, 28], "adjust": [2, 27, 36], "aggress": [2, 10, 27], "risk": [2, 11, 27], "short": [2, 4, 34], "resum": 2, "visibl": 2, "latenc": [2, 4, 5, 15, 16, 18, 20, 27, 29, 40, 41], "adopt": [2, 13], "approach": [2, 6, 27], "know": [2, 20, 26, 27], "suffici": [2, 22, 27], "even": [2, 4, 5, 11, 13, 18, 29, 36], "worst": 2, "consumpt": [2, 4, 15, 27], "node": [2, 5, 12, 20, 27, 29, 37, 39, 40], "cuda_visible_devic": 2, "care": 2, "taken": [2, 14, 15, 29], "backend": [2, 11, 12, 27, 35, 40, 41], "broadcast": [2, 29], "seen": [2, 27, 41], "ident": [2, 29], "mechan": [2, 11], "good": [2, 11], "wide": 3, "mistral": [3, 28, 37, 38, 40], "mixtral": [3, 28, 38, 40], "8x7b": [3, 28, 40], "structur": [3, 5, 6, 7, 36, 40, 41], "feedforward": 3, "neural": [3, 6, 11, 35, 40], "ffn": 3, "replac": [3, 6, 11, 13, 28, 29, 36], "dens": [3, 4, 8, 10, 29], "router": [3, 40], "dispatch": [3, 13], "As": [3, 4, 6, 8, 11, 12, 27, 29, 36, 37, 39, 41], "switch": [3, 15, 18, 21, 22, 36], "transform": [3, 4, 10, 11, 36, 39, 41], "http": [3, 8, 13, 21, 22, 23, 24, 28, 29, 35, 37, 38, 39], "arxiv": [3, 8, 29, 37], "org": [3, 8, 24, 29, 37], "pdf": [3, 8], "2101": 3, "03961": 3, "multi": [3, 5, 8, 10, 12, 13, 14, 19, 20, 21, 22, 29, 31, 37, 38, 40], "necessari": [3, 21, 29, 35, 41], "accommod": 3, "kind": [3, 4, 6], "pattern": [3, 20, 29], "evenli": 3, "distribut": [3, 4, 5, 11, 29, 36], "partial": [3, 11], "moe_tp_mod": 3, "convert_coneckpoint": 3, "py": [3, 4, 6, 8, 9, 10, 11, 13, 21, 22, 28, 29, 32, 34, 35, 39, 40], "tp_size": [3, 10, 13, 28, 29, 30, 31, 33, 39], "num_experts_per_tok": 3, "num_local_expert": 3, "find": [3, 28, 39], "gqa": [4, 14, 17, 27, 28, 29, 40], "quick": [4, 20, 25, 28], "remind": 4, "matmul": [4, 11, 27, 29, 37], "softmax": [4, 11, 29], "articl": [4, 41], "variant": [4, 13, 14, 29, 35], "fewer": [4, 14, 27, 41], "k": [4, 5, 8, 12, 29, 37, 39, 40, 41], "gpt_attent": [4, 6, 17, 29, 35, 40], "discuss": 4, "effici": [4, 5, 11, 12, 27, 28, 36, 41], "faster": [4, 13, 15, 16], "plugin": [4, 5, 6, 10, 20, 21, 22, 28, 29, 31, 35, 36, 37, 39, 40], "max_sequence_length": [4, 34], "excess": 4, "unneed": [4, 27], "variou": [4, 12, 22, 28, 41], "surround": 4, "overcom": [4, 11], "problem": [4, 21], "togeth": [4, 5, 8, 11, 12, 14, 19, 27, 37], "1d": [4, 29], "context_fmha_typ": [4, 36], "intermedi": [4, 11, 39], "q": [4, 5, 8, 14, 20, 29, 39], "slowest": 4, "footprint": [4, 14, 28, 36], "signific": [4, 16], "quadrat": [4, 36], "enabled_with_fp32_acc": 4, "accumul": [4, 29, 35], "forc": [4, 28], "fp32": [4, 20, 29, 35, 38], "vanilla": 4, "larger": [4, 15, 16, 18, 28, 29, 34, 36, 41], "flash": [4, 11], "flashattent": [4, 11, 35], "fast": [4, 41], "exact": [4, 36], "io": [4, 35, 36, 40], "awar": [4, 14, 39], "better": [4, 13, 17, 27, 40], "extra": [4, 10, 23, 24, 40, 41], "appli": [4, 6, 10, 11, 25, 29, 34, 37, 38, 40, 41], "plan": [4, 22], "order": [4, 14, 22, 27, 28, 29, 36], "overal": [4, 25, 41], "quantiz": [4, 5, 11, 14, 15, 20, 25, 27, 28, 29, 30, 31, 34, 35, 38, 40], "further": [4, 11, 14, 18], "acceler": [4, 15, 16, 17, 18, 25, 41], "use_fp8_context_fmha": [4, 32], "experiment": [4, 5, 22, 28, 37, 38, 40], "hopper": [4, 14, 15, 18, 20, 21, 25, 28, 38, 40], "notic": 4, "decreas": [4, 14, 15, 27], "accuraci": [4, 14, 27, 29, 37, 40], "mask": [4, 29, 30, 41], "abl": [4, 15, 29], "fly": [4, 28, 29, 37], "dequant": [4, 20, 29], "ia3": 4, "special": [4, 11, 14, 40], "occup": [4, 36], "low": [4, 9, 13, 18, 20, 27], "multi_block_mod": [4, 27, 32], "test": [4, 5, 18, 20, 21, 22, 27, 28, 40], "scenario": [4, 10, 16, 18, 27, 41], "definit": [4, 12, 13, 20, 29, 35], "hard": 4, "rule": [4, 39], "thumb": [4, 39], "worth": 4, "batch_siz": [4, 6, 10, 14, 17, 27, 28, 29, 30, 33, 34, 36], "num_head": [4, 27, 29, 31, 34], "less": [4, 5, 11, 15, 29], "processor": [4, 34, 40], "suggest": [4, 18], "evolv": [4, 13, 25, 37], "research": [4, 37], "conduct": 4, "immedi": [4, 25, 39, 41], "There": [4, 5, 6, 10, 13, 16, 21, 27, 28, 29, 36, 37, 39], "becom": [4, 6, 11, 18, 25], "heurist": [4, 29], "share": [4, 6, 13, 18, 21, 22, 28, 29, 30, 40, 41], "proport": 4, "warn": [4, 28, 29, 36], "still": [4, 13, 25, 28, 29, 36], "llama2": [4, 8, 14, 15, 40], "70b": [4, 13, 16, 18], "fp16": [4, 8, 10, 14, 15, 18, 20, 27, 29, 35, 38, 39, 40], "bf16": [4, 13, 20, 27, 38, 40], "128": [4, 8, 14, 15, 16, 17, 18, 26, 28, 32, 40], "disable_xqa": 4, "decid": [4, 10, 26, 37], "want": [4, 13, 21, 23, 26, 29, 39, 41], "trtllm_force_xqa": 4, "found": [4, 5, 6, 11, 12, 15, 19, 21, 37, 41], "shouldus": 4, "decoderxqarunn": 4, "decodermaskedmultiheadattent": 4, "With": [4, 5, 11, 20, 41], "purpos": [4, 21], "interleav": [4, 11], "reason": [4, 5, 11, 13, 29], "go": [4, 27], "s0": 4, "s1": 4, "s2": 4, "relax": 4, "ineffici": 4, "origin": [4, 6, 29], "behavior": [4, 5, 29, 34, 36], "wai": [4, 6, 12, 22, 24, 25, 36], "best": [4, 11, 20, 21, 26, 28, 40], "practic": [4, 11, 15, 16, 20, 36, 40], "past": 4, "monolith": 4, "max_seqlen": [4, 29], "hidden_dim_per_head": [4, 29], "lot": [4, 11, 12, 27], "decompos": 4, "track": 4, "recycl": 4, "simplifi": [4, 13, 29, 40], "rest": 4, "bfloat16": [4, 28, 37, 40], "kv_cache_quant_mod": [4, 29], "int8_kv_cach": [4, 37, 40], "fp8_kv_cach": [4, 37], "kv_cache_scaling_factor": [4, 10], "invers": 4, "doe": [4, 5, 13, 14, 22, 27, 29, 34, 36, 40], "multipli": [4, 29], "fp_valu": 4, "quantized_valu": 4, "treat": [4, 29], "circular": 4, "n": [4, 10, 26, 27, 28, 29, 36, 37, 39], "max_attention_window_s": [4, 27, 29, 34], "generationsess": [4, 34, 36], "summar": [4, 10, 16, 18, 27, 28, 36], "overwrit": 4, "surpass": 4, "window_s": 4, "deal": [4, 6], "veri": [4, 10, 11, 12, 18, 27], "long": [4, 18, 26, 27, 36], "_note": 4, "doesn": 4, "simpli": [4, 25, 28, 39, 41], "torch": [4, 21, 22, 29, 34, 39], "num_lay": [4, 29, 30, 31, 34], "stabl": [4, 23, 24, 29], "similar": [4, 5, 6, 14, 16, 22, 26, 29, 41], "kept": [4, 13, 29], "sink_token_length": [4, 34], "But": [4, 25], "self": [4, 6, 11, 29, 31, 34, 39], "offici": 4, "save": [4, 13, 27, 28, 36, 41], "distanc": [4, 29], "rather": [4, 6, 25, 41], "reconstruct": [4, 29], "correct": [4, 8, 27, 39, 41], "cache_indirect": [4, 29, 30, 34, 39], "beam_width": [4, 7, 27, 29, 34], "si": 4, "bi": 4, "ti": 4, "concaten": [4, 8, 29], "along": [4, 12, 29, 40], "3d": [4, 29], "batch_beam_s": [4, 29], "hidden_dim": [4, 29], "num_token": [4, 29], "seq": [4, 36], "context_phas": 4, "generation_phas": 4, "rotary_embedding_dim": [4, 29], "neox": [4, 5, 37, 38, 40], "j": [4, 5, 15, 18, 29, 37, 38, 40], "form": [4, 19, 29, 35, 41], "position_embedding_typ": [4, 10, 28, 29, 30, 31], "positionembeddingtyp": [4, 29, 30], "rope_gpt_neox": [4, 28, 29], "rope_gptj": [4, 28, 29, 30], "slope": [4, 29], "constant": [4, 29, 36], "norm_factor": 4, "q_scale": [4, 29, 30, 31], "sqrt": [4, 29], "head_siz": [4, 29, 31, 34, 40], "On": [4, 21, 22, 27, 29], "broader": [4, 40], "aspect": [4, 27], "issu": [4, 11, 13, 20, 39], "accord": [4, 29, 30], "lightweight": 4, "popular": [4, 10, 13, 18], "t5": [4, 5, 37, 38, 40], "famili": [4, 38, 40], "ahead": [4, 41], "ii": [4, 29], "implicit": [4, 29], "suit": 4, "max_dist": [4, 29, 30, 31], "python": [5, 6, 8, 11, 12, 13, 20, 22, 23, 24, 25, 27, 35, 37, 40, 41], "api": [5, 10, 11, 12, 20, 21, 25, 26, 27, 29, 35, 36, 39, 41], "section": [5, 7, 8, 11, 13, 19, 21, 22, 25, 27, 28, 29, 35, 38, 40], "declar": [5, 6, 13], "mention": [5, 13], "restrict": [5, 21, 29], "bloom": [5, 9, 23, 37, 38, 40], "llama": [5, 8, 13, 15, 16, 18, 35, 37, 38, 40, 41], "now": [5, 10, 14, 25, 27, 35, 41], "enc_dec": [5, 38], "gptmodelconfig": 5, "worldconfig": 5, "come": [5, 8, 15, 22, 36], "famou": 5, "mpi_comm_world": 5, "getter": 5, "setter": 5, "numlay": 5, "numhead": 5, "numkvhead": 5, "queri": [5, 11, 14, 20, 28, 29, 36, 41], "relev": [5, 21, 22], "numer": [5, 20, 35, 38], "familiar": [5, 11], "lmm": 5, "main": [5, 7, 14, 17, 19, 23, 26, 29, 38, 39], "thing": 5, "locat": [5, 6, 11, 21, 22, 24, 29, 39], "cluster": 5, "collabor": [5, 29], "tp": [5, 8, 14, 15, 16, 17, 18, 28, 29], "balanc": [5, 11, 27, 41], "bandwidth": [5, 11, 14, 15, 16, 18], "presenc": [5, 11], "nvlink": [5, 27], "consecut": 5, "happen": [5, 11, 39], "boundari": [5, 11, 36], "harder": 5, "absenc": 5, "advantag": [5, 25], "interconnect": 5, "a100": [5, 13, 38], "dgx": [5, 11], "penalti": [5, 40], "mutual": [5, 37], "largest": [5, 14, 15, 16, 29], "exponenti": [5, 41], "factual": 5, "enhanc": [5, 25, 27, 36, 41], "open": [5, 14, 22, 24, 25, 39], "0e": 5, "influenc": 5, "remain": [5, 6, 27, 29, 36, 41], "greedi": [5, 7], "upper": [5, 29, 36], "factor": [5, 18, 27, 29, 36, 37], "length_penalti": [5, 34], "scalar": [5, 29], "deprec": [5, 40], "favor": 5, "gptsession": [5, 21, 34, 36, 40], "gptsessiontest": 5, "charg": [5, 11], "gptdecod": 5, "directli": [5, 6, 11, 13, 21, 22, 27], "custom": [5, 11, 13, 14, 25, 28, 29, 34, 40], "satisfi": [5, 27], "separ": [5, 21, 27, 28, 29, 34, 41], "biggest": 5, "individu": 5, "revisit": 5, "maintain": [5, 14, 15, 18, 37], "develop": [5, 10, 11, 13, 21, 22, 25, 28, 29, 40], "could": [5, 6, 10, 27, 36, 39], "rebuild": 5, "pytorch": [6, 10, 12, 24, 29, 40, 41], "ilay": [6, 11], "inetworkdefinit": [6, 11], "gw": 6, "manipul": 6, "modifi": [6, 19, 27, 35], "highest": [6, 15, 16], "facilit": [6, 41], "modif": [6, 11], "gemm": [6, 35, 36, 40], "smoothquant": [6, 18, 20, 38, 40], "finer": 6, "grain": 6, "ideal": 6, "would": [6, 28, 41], "lead": [6, 11, 27, 41], "nest": 6, "flow": [6, 13], "scatter": 6, "core": [6, 8, 11, 13, 14, 15, 17, 22, 40], "get_par": [6, 29], "get_us": [6, 29], "consum": [6, 29], "replace_all_uses_with": [6, 29], "miss": [6, 28], "especi": [6, 41], "opaqu": 6, "world": [6, 25, 29, 35], "wise": 6, "singleton": [6, 29], "flayerinfomemo": 6, "replace_input_with": 6, "replace_output_uses_with": 6, "redirect": 6, "consist": [6, 13, 15, 25, 29, 37, 39], "patternrewrit": 6, "match_and_rewrit": 6, "combin": [6, 16, 27, 28, 30, 40, 41], "complex": [6, 11, 41], "patternanalyz": 6, "analysi": [6, 20, 36], "analyz": [6, 26], "rewritepatternmanag": 6, "label": [6, 29], "privileg": [6, 28], "analysispatternmanag": 6, "vital": [6, 18], "certain": [6, 10, 25, 38], "manner": 6, "routin": 6, "subtract": 6, "test_graph_rewrit": 6, "naivepatternrewriter_replaceaddwithsub": 6, "def": [6, 11, 13, 39], "__init__": [6, 11, 39], "super": [6, 13, 39], "replace_add_with_sub": 6, "root_lay": 6, "layertyp": 6, "elementwis": [6, 29], "separate_match_rewrit": 6, "enter": 6, "as_lay": 6, "elementwiseoper": [6, 29], "here": [6, 8, 10, 13, 15, 16, 21, 22, 26, 29, 36, 37, 39], "elementwise_sum": 6, "net_guard": 6, "subgraph": [6, 29], "b": [6, 8, 11, 14, 15, 16, 17, 28, 29, 34], "get_input": 6, "get_output": [6, 11], "old": 6, "insert": [6, 11, 29], "elementwise_sub": 6, "dangl": 6, "prune": [6, 29, 41], "explicitli": [6, 11, 40, 41], "skip": [6, 22, 28], "mark_as_remov": 6, "unnecessari": 6, "illustr": [6, 12, 41], "four": [6, 10, 30, 41], "nearli": [6, 15], "never": [6, 27], "depriv": 6, "commonli": 6, "gptattentionpluginremovepaddingrewritepass": 6, "gpt_attention_plugin_remove_pad": 6, "plugin_v2": 6, "plugin_namespac": 6, "plugin_typ": 6, "gptattent": 6, "flayer": 6, "assert": [6, 29], "although": 6, "black": 6, "box": 6, "tensor_input": 6, "qkv": [6, 8, 10, 20, 29, 30, 39], "extern": [6, 28, 34, 36], "arg": [6, 13, 31], "in_len": 6, "new_input": 6, "clone_input": 6, "arglist": 6, "float16": [6, 7, 8, 10, 13, 28, 31, 32, 35, 39], "new_out": 6, "replace_outputs_uses_with": 6, "quit": [6, 28], "focu": [6, 18, 26], "u": [6, 23], "real": [6, 21, 22, 28, 29], "fuseattentionwithbiaspass": 6, "graph_rewrit": 6, "gptmanag": [7, 20, 40, 41], "mandatori": [7, 10, 19], "omit": [7, 13, 19, 29], "request_output_len": 7, "input_id": [7, 31, 34, 39], "num_input_token": 7, "suppli": [7, 12, 22], "applic": [7, 15, 18, 25, 26, 39, 40, 41], "runtime_top_k": 7, "runtime_top_p": 7, "len_penalti": 7, "early_stop": [7, 34, 40], "repetition_penalti": [7, 34, 40], "min_length": [7, 34], "presence_penalti": [7, 34, 40], "frequency_penalti": [7, 34, 40], "random_se": [7, 31, 34], "end_id": [7, 34, 40], "pad_id": [7, 34], "embedding_bia": 7, "bad_words_list": [7, 34], "num_bad_word": 7, "stop_words_list": [7, 34], "num_stop_word": 7, "prompt_embedding_t": [7, 30, 31, 34], "prompt_vocab_s": [7, 31, 34], "vocab": [7, 29, 34], "lora_task_id": [7, 8], "lora_weight": [7, 8], "lora_config": [7, 8, 31], "subsequ": [7, 8, 41], "oldest": [7, 8], "space": [7, 8, 21, 28, 36], "ones": [7, 8], "num_lora_modules_lay": [7, 8], "hi": [7, 8], "ho": [7, 8], "2b": [7, 20], "module_id": [7, 8], "layer_idx": [7, 8, 29], "return_log_prob": 7, "return_context_logit": 7, "return_generation_logit": 7, "draft_input_id": 7, "draft_logit": 7, "git": [8, 21, 22, 23, 24, 28, 35, 39], "lf": [8, 21, 23, 28], "clone": [8, 21, 22, 23, 24, 28, 35, 39], "huggingfac": [8, 10, 13, 31, 35, 39, 40], "co": [8, 29, 35, 39], "qychen": 8, "luotuo": 8, "7b": [8, 18, 35, 38, 40, 41], "kunish": 8, "japanes": 8, "alpaca": 8, "v0": [8, 14, 15, 16, 17, 40], "base_model": 8, "hf": [8, 34, 35, 39], "convert_checkpoint": [8, 10, 13, 35, 39, 40], "model_dir": [8, 10, 13, 33, 35, 39], "output_dir": [8, 10, 13, 28, 31, 33, 35, 39], "tmp": [8, 28], "llama_7b": 8, "trt_ckpt": [8, 10, 39], "trtllm": [8, 10, 13, 27, 28, 35, 36, 39, 40, 41], "checkpoint_dir": [8, 10, 13, 35, 39], "llama_7b_with_lora_qkv": 8, "trt_engin": [8, 10, 39], "gpt_attention_plugin": [8, 27, 28, 30, 32, 34, 39], "context_fmha": [8, 27, 28, 32], "paged_kv_cach": [8, 27, 28, 32, 34], "gemm_plugin": [8, 10, 30, 32, 35], "lora_plugin": [8, 29, 32, 34], "512": [8, 16, 17, 28, 31], "lora_dir": [8, 34], "max_lora_rank": [8, 31], "lora_target_modul": [8, 31, 34], "attn_q": 8, "attn_k": 8, "attn_v": 8, "script": [8, 13, 21, 22, 24, 28, 32, 35, 37, 39, 40], "hug": [8, 12, 13, 31, 35], "face": [8, 12, 13, 31, 35], "numpi": [8, 29, 30], "python3": [8, 10, 21, 23, 24, 28, 35, 39], "hf_lora_convert": 8, "storag": [8, 28], "tensorrtllm_backend": [8, 35], "triton": [8, 11, 12, 20, 23, 25, 40, 41], "loraweight": 8, "lorataskid": 8, "attn_qkv": 8, "flatten": [8, 17], "num_lora_module_lay": 8, "layer1": 8, "hidden_size_in": 8, "hidden_size_out": 8, "2106": 8, "09685": 8, "paper": [8, 15, 37, 41], "compbin": 8, "attn_dens": 8, "mlp_h_to_4h": 8, "gate": [8, 29], "mlp": [8, 9, 11, 29, 31, 39], "rmsnorm": [8, 29, 30, 40], "mlp_4h_to_h": 8, "mlp_gate": 8, "cross_attn_qkv": 8, "cross": [8, 29], "cross_attn_q": 8, "cross_attn_k": 8, "10": [8, 18, 23, 24, 29, 35, 38, 39, 41], "cross_attn_v": 8, "11": [8, 16, 18, 21, 22, 29, 39, 41], "cross_attn_dens": 8, "12": [8, 10, 15, 22, 23, 24, 26, 28, 29, 39, 40], "idea": 8, "resid": [8, 39], "distinct": [8, 41], "percentag": 8, "concat": [9, 27, 29], "basic": [9, 23], "linear": [9, 10, 11, 29, 36, 37, 41], "layernorm": [9, 29, 30, 31, 40], "high": [9, 11, 13, 14, 18, 19, 27, 28, 36, 40], "earlier": [10, 39], "timelin": 10, "emphasi": 10, "unifi": [10, 13, 18, 40], "workflow": [10, 20, 29, 40], "rich": 10, "team": [10, 13, 40], "effort": [10, 41], "cli": [10, 20, 35], "tool": [10, 20, 22, 27, 35, 40], "sourc": [10, 13, 14, 17, 20, 25, 28, 29, 30, 31, 32, 33, 34, 40], "framework": [10, 12, 13, 25, 29, 38, 41], "command": [10, 13, 21, 22, 24, 28, 32, 35, 36, 39, 40, 41], "runner": 10, "nemo": [10, 12, 25, 34, 37, 38], "ammo": [10, 13, 33], "modelrunn": [10, 34], "jax": [10, 13], "deepspe": 10, "train": [10, 12, 13, 15, 18, 39, 41], "nvidia": [10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24, 25, 27, 28, 29, 35, 36, 38, 39, 40], "microsoft": [10, 22, 24], "format": [10, 13, 15, 18, 20, 21, 25, 34, 35, 36, 39], "One": [10, 11, 29, 39], "hyper": [10, 22], "dictionari": [10, 30], "logits_dtyp": [10, 31], "float32": [10, 29, 30, 31], "max_position_embed": [10, 28, 30, 31], "null": [10, 28], "num_hidden_lay": [10, 28, 31], "num_attention_head": [10, 28, 30, 31], "num_key_value_head": [10, 28, 31], "hidden_act": [10, 28, 30, 31], "intermediate_s": [10, 28, 31], "norm_epsilon": [10, 28, 31], "1e": [10, 28, 29, 30, 31], "learned_absolut": [10, 29, 30], "world_siz": [10, 13, 28, 29, 31, 35, 40], "pp_size": [10, 28, 31, 33], "quant_algo": [10, 13, 28], "str": [10, 13, 29, 30, 31, 32, 34], "kv_cache_quant_algo": [10, 28], "group_siz": [10, 29], "has_zero_point": 10, "pre_quant_scal": 10, "exclude_modul": 10, "sub": [10, 13, 29], "optforcausallm": [10, 31], "w8a16": [10, 18, 20], "w4a16": [10, 18, 20], "w4a16_awq": [10, 13], "w4a8_awq": [10, 13], "w4a16_gptq": 10, "w8a8_sq_per_channel": 10, "extens": [10, 12, 25], "opt": [10, 18, 29, 35, 37, 38, 40], "do_layer_norm_befor": 10, "falcon": [10, 18, 37, 38, 40], "new_decoder_architectur": [10, 28], "parallel_attent": [10, 28], "hierarch": 10, "whose": [10, 30], "bias": [10, 29], "th": [10, 29], "fc": [10, 11, 39], "proj": [10, 39], "input_layernorm": 10, "post_layernorm": [10, 29, 39], "activation_scaling_factor": 10, "weights_scaling_factor": 10, "prequant_scaling_factor": 10, "out_featur": [10, 11, 30], "in_featur": [10, 11, 30], "wherea": 10, "out_fatur": 10, "transpos": [10, 29], "post": [10, 15, 18, 25, 26, 35, 40], "deploi": [10, 20, 21, 24, 25, 41], "cd": [10, 21, 22, 23, 28, 35, 39], "125m": [10, 41], "rank0": 10, "safetensor": [10, 39], "rank1": 10, "768": [10, 28], "50272": 10, "2048": [10, 14, 16, 17, 28, 31, 34], "relu": [10, 11, 29, 31, 39], "use_parallel_embed": [10, 27, 28, 31], "embedding_sharding_dim": [10, 27, 28, 31], "share_embedding_t": [10, 28, 31], "export": [10, 13, 22, 28, 33, 34], "usr": [10, 21, 28], "local": [10, 11, 21, 22, 23, 24, 25, 28], "bin": [10, 23, 24, 26, 39], "924": 10, "100": [10, 28, 35], "mpirun": [10, 26, 28, 39, 40], "root": [10, 21, 22, 28, 29], "engine_dir": [10, 13, 28, 34, 35, 39], "test_trt_llm": 10, "hf_model_dir": [10, 13, 31], "data_typ": 10, "check_accuraci": 10, "tensorrt_llm_rouge1_threshold": 10, "14": [10, 18, 26, 28, 39], "power": [11, 16, 18, 25], "deep": [11, 15, 16, 26, 29], "concept": 11, "proceed": [11, 35], "builder": [11, 13, 40], "create_network": 11, "iactivationlay": 11, "act_typ": [11, 29], "activationtyp": [11, 29], "default_trtnet": 11, "add_activ": 11, "trt_tensor": [11, 29], "_create_tensor": 11, "easier": [11, 13], "few": [11, 13, 18, 28], "standard": [11, 12, 14, 29, 41], "deriv": [11, 27, 36], "sigmoid": [11, 29], "assembl": [11, 12], "advanc": [11, 21, 22, 40, 41], "silu": [11, 29], "travers": 11, "build_engin": 11, "build_serialized_network": 11, "everyth": 11, "ihostmemori": [11, 34], "binari": [11, 24, 26, 29, 41], "sweep": [11, 15], "choos": [11, 13, 24, 29], "crucial": [11, 18, 41], "candid": [11, 41], "amount": [11, 27, 34, 36, 39], "movement": 11, "extrem": 11, "speed": [11, 15, 40], "discov": 11, "highli": [11, 26, 41], "emb": [11, 30], "tensorrt_llm_gpt": 11, "fromfil": 11, "refit": 11, "refit_engin": 11, "kei": [11, 14, 18, 19, 28, 31, 34, 39], "transfer": [11, 40], "dram": 11, "multiprocessor": [11, 27], "cost": [11, 36, 40], "classic": [11, 20], "usual": [11, 13, 22, 27, 29, 39], "preced": 11, "written": 11, "again": [11, 39], "suboptim": 11, "why": [11, 29, 36], "twice": 11, "almost": [11, 36], "infinit": 11, "multihead": [11, 14], "arithmet": 11, "bmm": 11, "stand": [11, 27, 28], "trivial": 11, "someth": 11, "polyhedr": 11, "uncommon": 11, "inevit": 11, "offer": [11, 12, 18, 25], "interfac": [11, 13, 34], "extend": [11, 29], "guid": [11, 18, 20, 25, 26, 29, 39], "fairli": 11, "quantizetensorplugin": 11, "inputdesc": 11, "invokequant": 11, "els": [11, 13, 26, 29, 39], "cu": 11, "quantizedkernel": 11, "grid": 11, "role": 11, "drive": 11, "bodi": 11, "primit": [11, 25, 35], "nccl": [11, 27, 29, 39], "librari": [11, 12, 21, 22, 24, 25, 28, 35, 39, 40], "connect": 11, "nvswitch": 11, "ncclplugin": 11, "allreduc": [11, 29, 40], "allgath": [11, 29], "gather_dim": [11, 29], "tgt": [11, 29], "recv": [11, 29], "former": [11, 18], "sibl": 11, "incur": 11, "term": [11, 27, 29, 35], "toolkit": [12, 13, 18, 22, 23, 24], "solut": [12, 39], "easili": [12, 25, 29], "web": 12, "servic": 12, "your": [12, 13, 18, 21, 22, 23, 24, 25, 26, 27, 28, 35, 39, 41], "outsid": [12, 13], "scope": 12, "checkpoint": [12, 13, 20, 31, 33, 34, 35, 37, 39, 40], "download": [12, 22, 24, 35, 39], "hub": [12, 35, 40], "equip": 12, "recreat": 12, "eas": [12, 25], "hand": [12, 41], "onlin": [12, 16, 27, 28], "pull": [12, 21, 28, 35], "pretrain": 12, "repositori": [12, 22, 24, 35, 41], "major": [13, 25, 36], "convent": [13, 29], "shall": [13, 36], "had": 13, "migrat": [13, 32, 40], "ship": 13, "disadvantag": 13, "lib": [13, 22, 24], "quickli": [13, 35], "date": 13, "thu": [13, 29, 36], "And": [13, 29, 30, 36, 41], "hoc": [13, 34], "mitig": 13, "refactor": [13, 40], "sit": 13, "ongo": 13, "topmodelmixin": [13, 31], "introduc": [13, 15, 21, 37, 40], "from_hugging_fac": [13, 31], "llamaforcausallm": [13, 28, 31], "inherit": [13, 29], "direct": [13, 39], "hierarchi": [13, 29], "classmethod": [13, 31, 34], "cl": 13, "kwarg": [13, 31, 34], "rais": [13, 39], "notimplementederror": 13, "decodermodelforcausallm": [13, 31], "dict": [13, 31, 34], "Then": [13, 29, 39], "logic": [13, 30], "greatli": 13, "affect": [13, 36], "simplic": 13, "save_checkpoint": [13, 31], "save_config": [13, 31], "disk": [13, 21], "intention": 13, "slower": 13, "avoid": [13, 21, 36, 40], "meta": [13, 35], "from_meta_ckpt": [13, 31], "therefor": [13, 29, 39], "don": [13, 29, 41], "from_jax": 13, "from_nemo": 13, "from_kera": 13, "factori": [13, 34], "gemma": [13, 38, 40], "kera": 13, "contribut": [13, 29, 40], "freedom": 13, "read_config_from_the_custom_training_checkpoint": 13, "weights_dict": 13, "convert_weights_from_custom_training_checkpoint": 13, "assign": [13, 30], "convert_and_load_weights_into_trtllm_llama": 13, "though": [13, 36], "pitfal": 13, "fp8": [13, 14, 16, 17, 18, 20, 27, 29, 36, 38, 40], "smooth": [13, 40], "quant": [13, 40], "int4": [13, 18, 20, 38, 40], "standalon": 13, "pretrainedmodel": [13, 31, 36], "quant_config": [13, 31], "quantconfig": [13, 31, 40], "quanticonfig": 13, "use_ammo_quant": 13, "nativ": [13, 15, 27], "140g": 13, "70g": 13, "210g": 13, "h100": [13, 18, 25, 38], "program": [13, 22, 24, 27, 35], "cautiou": 13, "mpi_barri": 13, "quant_mod": [13, 30, 31, 34], "from_checkpoint": [13, 31], "build_config": 13, "backward": 13, "compat": [13, 38, 41], "older": 13, "trace": 13, "buildconfig": 13, "thin": 13, "deseri": 13, "ckpt_dir": [13, 31], "pretrainedconfig": [13, 31], "xxx": 13, "promis": 13, "unstabl": 13, "instal": [13, 21, 22, 25, 28, 35], "mismatch": 13, "github": [13, 21, 22, 23, 25, 28, 29, 35, 38], "caus": [13, 28, 40], "com": [13, 21, 22, 23, 24, 28, 29, 35, 38, 39], "1293": 13, "1252": 13, "1079": 13, "txt": [13, 23, 28], "natur": 13, "sure": [13, 28, 29, 35, 39, 40], "gradual": 13, "span": 13, "accur": 14, "141gb": 14, "eight": 14, "800": 14, "tok": [14, 16, 17, 28], "retain": [14, 16], "great": 14, "preliminari": [14, 16, 17], "measur": [14, 16, 17, 18, 20, 28], "subject": [14, 16, 17, 18, 29, 35], "tp1": [14, 15, 16], "7a": 14, "1xh200": 14, "advers": 14, "4x": [14, 15, 16], "lin": 14, "et": 14, "al": 14, "2023": [14, 26, 38, 39], "compress": 14, "4bit": 14, "rel": [14, 24, 29, 40], "capabl": [14, 21, 25, 26], "performantli": 14, "achiev": [14, 18, 27, 41], "803": 14, "941": [14, 17], "163": [14, 28], "4096": [14, 28, 34], "946": 14, "263": 14, "8xh200": 14, "dp": [14, 17], "960": 14, "192": 14, "560": 14, "96": [14, 28, 40], "640": 14, "6a": 14, "ainsli": 14, "publish": [14, 17, 40], "branch": [14, 17, 23, 24], "announc": [14, 15, 17], "blog": [14, 17, 18, 23, 40], "calcul": [14, 15, 17, 27, 29], "out_tp": [14, 17], "output_seqlen": [14, 17], "total_lat": [14, 17], "glossari": [14, 17], "isl": [14, 15, 16, 17, 28], "osl": [14, 15, 16, 17, 28], "oom": [14, 17, 36], "bangbang": 15, "h200": 15, "learn": [15, 16, 18, 29], "comparison": 15, "sec": 15, "13b": 15, "evalu": [15, 16, 20, 27], "amper": [15, 21, 25, 28, 38, 40], "show": [15, 19, 36, 38], "1st": [15, 28, 29, 36], "min": [15, 29], "10m": 15, "sxm": 15, "80gb": [15, 18, 28], "32": [15, 16, 28, 29, 36, 37, 40], "success": 15, "6b": [15, 29, 40], "907": 15, "102": 15, "185": [15, 28], "679": 15, "481": 15, "111": 15, "speedup": [15, 17, 18], "0x": 15, "7x": 15, "behind": 15, "chart": 15, "stai": [15, 18], "highlight": [15, 18], "demonstr": [15, 19], "5x": [15, 18], "2x": [15, 16], "center": [15, 16], "ai": [15, 25], "hpc": 15, "analyt": 15, "cloud": 15, "edg": 15, "workstat": [15, 25], "doubl": [15, 36], "halv": [15, 29], "16": [15, 18, 28, 31, 36, 37, 39, 41], "e4m3": 15, "expon": 15, "mantissa": 15, "e5m2": 15, "gradient": 15, "perceiv": [15, 28], "w8a8": [15, 18, 20], "8bit": 15, "loudspeak": 16, "819": 16, "9x": [16, 17], "hbm3e": 16, "llama_13b": 16, "1024": [16, 18, 27, 28, 29, 30], "750": 16, "349": 16, "llama_70b": 16, "014": 16, "654": [16, 28], "341": 16, "303": 16, "v9": 16, "offlin": [16, 28], "tp8": 16, "chat": [16, 35], "agent": 16, "80": [16, 21, 28, 38, 40], "200": [16, 28], "gpt3": 16, "175b": 16, "hgx": 16, "6x": 16, "vari": 16, "swept": 16, "newest": 16, "portfolio": 16, "8tb": 16, "141": 16, "gigabyt": 16, "gb": [16, 21, 22], "convers": [17, 18, 20], "experi": [17, 18, 24, 25, 26, 41], "curv": 17, "equat": [17, 29], "tpot": 17, "axi": [17, 29], "8xh100": 17, "8a": 17, "227": 17, "13": [17, 26, 29, 39], "232": [17, 28], "25": [17, 28], "300": 17, "deploy": [18, 23, 25, 35], "imped": 18, "emerg": 18, "address": [18, 28, 36], "bottleneck": 18, "overview": [18, 19, 20, 21, 26], "appropri": [18, 27, 28, 39], "tailor": 18, "significantli": [18, 36], "dl": 18, "genai": 18, "hardwar": [18, 20, 21], "easi": 18, "mind": [18, 27], "line": [18, 22, 24, 28, 36, 40], "ptq": [18, 27], "impos": 18, "v2": [18, 35, 37, 38], "constrain": 18, "500m": 18, "notabl": 18, "3x": 18, "sq": [18, 37, 40], "40x": 18, "44x": 18, "30x": 18, "51x": 18, "47x": 18, "32x": 18, "mmlu": 18, "baselin": [18, 28], "loss": 18, "180b": 18, "70": [18, 28], "68": 18, "56": [18, 28], "awq": [18, 20, 38, 40], "69": 18, "85": [18, 28, 36, 40], "40b": 18, "55": [18, 26], "89": [18, 21, 22, 38], "54": 18, "07": 18, "87": [18, 28], "67": 18, "75": [18, 40], "01": 18, "mpt": [18, 37, 38, 40], "30b": 18, "47": 18, "21": [18, 39, 41], "46": 18, "compris": 18, "calibr": 18, "consider": 18, "prefer": [18, 21], "choic": [18, 29, 34, 36, 41], "densiti": 18, "consequ": 18, "speicfic": 18, "priorit": 18, "meet": 18, "gptq": [18, 20, 38, 40], "toler": 18, "tradeoff": 18, "our": [18, 26, 28, 40], "medium": [18, 39], "minut": 18, "ten": [18, 41], "w4a8": 18, "ll": [18, 22, 28], "becaus": [18, 27, 28, 29, 36], "occupi": [18, 36], "neglig": 18, "re": [18, 22, 24, 25, 27], "ada": [18, 21, 25, 28, 38, 40], "latter": [18, 27], "machin": [18, 22, 40], "bring": 18, "upcom": 18, "_cpp_gen": 19, "rst": 19, "everi": [19, 34], "properti": [19, 29, 31, 34], "what": [19, 20, 26, 35], "check": [19, 22, 23, 29, 36, 39], "interpret": [19, 21], "prerequisit": [20, 24], "docker": [20, 23, 28, 39, 40], "retriev": [20, 29], "linux": [20, 22, 38], "imag": [20, 23, 40], "bind": [20, 36, 40], "link": [20, 40], "bare": 20, "metal": 20, "prepar": [20, 28, 29, 31, 37], "cyclic": [20, 29, 34], "roll": 20, "rewrit": [20, 29], "expert": [20, 40], "mixtur": [20, 40], "moe": [20, 40], "methodologi": 20, "reproduc": [20, 27, 40], "benchmark": [20, 22, 26, 27, 35, 40], "coordin": [20, 29, 41], "nsight": [20, 22], "ifb": [20, 40, 41], "troubleshoot": 20, "debug": [20, 21, 34, 36], "e2": 20, "tip": 20, "dq": 20, "technic": 20, "understand": [20, 21, 26], "faq": 20, "instruct": [21, 22, 41], "gnu": 21, "abi": 21, "platform": 21, "apt": [21, 23], "submodul": [21, 22, 28], "recurs": [21, 22, 28], "approxim": 21, "63": [21, 26], "release_build": 21, "cuda_arch": 21, "cmake": [21, 22], "release_run": [21, 35], "local_us": [21, 28, 35], "account": 21, "app": 21, "tag": 21, "devel": [21, 23], "latest": [21, 22, 23, 35], "target": [21, 27, 41], "dockerfil": 21, "rm": [21, 23, 28, 29, 35, 39], "ipc": 21, "ulimit": [21, 39], "memlock": [21, 39], "stack": [21, 29], "67108864": 21, "pwd": [21, 35], "workdir": 21, "build_wheel": [21, 22, 28], "trt_root": [21, 22, 28], "pip": [21, 22, 23, 24, 28, 35], "whl": [21, 22, 24, 28], "increment": 21, "clean": [21, 26, 39], "semicolon": 21, "cuda_architectur": 21, "86": 21, "wheel": [21, 22, 28], "consult": [21, 26, 41], "cpp_onli": 21, "particularli": [21, 22], "dual": 21, "gcc": 21, "overridden": 21, "build_dir": 21, "libtensorrt_llm": 21, "against": [21, 22], "libnvinfer_plugin_tensorrt_llm": 21, "27": [22, 28], "visual": 22, "studio": 22, "2022": 22, "unzip": [22, 24], "right": [22, 25, 28, 29], "click": [22, 24], "icon": 22, "trai": 22, "bottom": 22, "taskbar": 22, "tab": 22, "uncheck": 22, "wsl": 22, "cp": [22, 35], "unless": [22, 27], "mount": [22, 28], "At": [22, 30, 36], "manual": [22, 34, 39], "readm": [22, 40, 41], "md": [22, 29, 40], "12g": 22, "workspac": [22, 29, 36], "dll": 22, "nvtx": [22, 26], "asset": 22, "drop": [22, 27], "deselect": 22, "setup_build_env": 22, "ps1": [22, 24], "powershel": [22, 24], "administr": [22, 24], "trtpath": 22, "skipcmak": 22, "skipvsbuildtool": 22, "skiptrt": 22, "reopen": 22, "workload": [22, 26, 27], "userprofil": [22, 24], "bash": [22, 23, 24, 26], "pick": [22, 24], "verifi": [22, 24, 29, 40, 41], "__version__": 22, "x86": 22, "buildtool": 22, "common7": 22, "vsdevshel": 22, "arch": 22, "amd64": 22, "g": [22, 34], "gui": 22, "path_to_trt_root": 22, "geforc": [22, 25], "40": [22, 40], "seri": 22, "card": 22, "enable_multi_devic": 22, "impli": 22, "wish": 22, "exp": [22, 29], "stub": 22, "tensorrt_llm_stat": 22, "nvinfer_plugin_tensorrt_llm": 22, "th_common": 22, "thop": 22, "append": [22, 29], "appdata": [22, 24], "python310": [22, 24], "site": 22, "entrypoint": 23, "ubuntu22": 23, "04": 23, "openmpi": [23, 40], "libopenmpi": 23, "dev": 23, "preview": [23, 40], "pip3": 23, "url": [23, 24], "pypi": [23, 24], "beta": [24, 29], "setup_env": 24, "skipcuda": 24, "skippython": 24, "skipmpi": 24, "ex": 24, "navig": 24, "express": [24, 29], "msi": 24, "sdk": 24, "cudnn": 24, "button": 24, "edit": 24, "serprofil": 24, "ok": [24, 39], "dialog": 24, "cu121": 24, "_util": [24, 29], "trt_version": 24, "repo": 25, "revolution": 25, "artifici": 25, "intellig": 25, "digit": 25, "organ": 25, "challeng": 25, "appar": 25, "fact": 25, "expens": [25, 27, 41], "comprehens": 25, "intuit": 25, "broken": 25, "begin": [25, 41], "groundbreak": 25, "abil": 25, "lovelac": [25, 38, 40], "ture": [25, 38], "volta": [25, 38], "enthusiast": 25, "pc": 25, "rtx": 25, "backbon": 25, "modular": 25, "eager": 25, "dive": [25, 26], "explor": [25, 41], "embark": 25, "journei": 25, "unlock": 25, "incred": 25, "driven": 25, "metric": 26, "middl": 26, "ground": 26, "outlin": 26, "toggl": 26, "off": [26, 36], "region": 26, "extract": [26, 29], "log_iteration_data": 26, "stdout": 26, "metadata": [26, 35], "info": [26, 39], "249": [26, 28], "231": 26, "2448": 26, "28784": 26, "540173600": 26, "239": 26, "6904": 26, "tllm_gptm_profile_start_stop": 26, "csv": 26, "tllm_gpts_profile_start_stop": 26, "sai": 26, "tell": 26, "cudaprofilerapi": 26, "captur": 26, "rang": [26, 29, 31, 36, 37, 38, 41], "repeat": [26, 29], "127": [26, 29], "resolut": 26, "pmi_rank": 26, "mpich": 26, "slurm_procid": 26, "srun": [26, 39], "ompi_comm_world_local_rank": 26, "eq": [26, 29], "nsy": 26, "nsys_mpi_store_teams_per_rank": 26, "frequenc": 26, "100000": 26, "fi": 26, "profile_rank_0": 26, "env": 26, "strongli": 27, "max_num_token": [27, 31, 36], "roughli": 27, "estim": 27, "alpha": [27, 29], "rough": 27, "invoc": 27, "20": [27, 28, 34, 39, 41], "firstli": [27, 36], "secondli": 27, "realist": 27, "benefici": 27, "plateau": 27, "satur": 27, "hurt": 27, "input_seq_len": 27, "empir": 27, "sequence_count": 27, "multiprocessor_count": 27, "fulli": [27, 35, 40], "henc": 27, "use_custom_all_reduc": [27, 32, 34], "pcie": 27, "shard": [27, 29, 30], "look_up": 27, "lm_head": 27, "aforement": 27, "lookup": [27, 29, 30], "correctli": [27, 40], "use_embedding_shar": 27, "use_lookup_plugin": 27, "use_gemm_plugin": 27, "swiglu": [27, 28, 29], "downsid": 27, "slight": 27, "reduct": [27, 29, 41], "scale": [27, 29, 37], "discard": 27, "use_fused_mlp": [27, 28], "cublaslt": 27, "bert_attention_plugin": [27, 32], "knob": 27, "tweak": [27, 28], "inflight_batch": 27, "inflight_fused_batch": 27, "max_tokens_in_paged_kv_cach": [27, 35], "kv_cache_free_gpu_mem_fract": [27, 35], "tend": 27, "translat": [27, 40], "left": [27, 29, 36], "unset": 27, "clearli": 27, "leav": 27, "95": 27, "goal": 27, "max_input_length": [27, 29, 30, 31], "max_output_length": 27, "exce": [27, 29], "enable_chunked_context": 27, "chanc": 27, "therebi": 27, "tokens_per_block": [27, 32, 34], "observ": 28, "304": 28, "120": 28, "530": 28, "785": 28, "753": 28, "896": 28, "460": 28, "950": 28, "423": 28, "867": 28, "618": 28, "348": 28, "391": 28, "522": 28, "989": 28, "963": 28, "418": 28, "458": 28, "118": 28, "990": 28, "265": 28, "860": 28, "350": 28, "570": 28, "212": 28, "404": 28, "623": 28, "84": 28, "405": 28, "731": 28, "19": [28, 39], "854": 28, "944": 28, "826": 28, "214": 28, "725": 28, "346": 28, "011": 28, "837": 28, "112": 28, "246": 28, "859": 28, "904": 28, "684": 28, "562": 28, "387": 28, "971": 28, "721": 28, "885": 28, "574": 28, "537": 28, "478": 28, "49": 28, "152": 28, "15": [28, 39], "52": 28, "876": 28, "549": 28, "545": 28, "815": 28, "251": 28, "776": 28, "698": 28, "576": 28, "842": 28, "724": 28, "319": 28, "801": 28, "390": 28, "484": 28, "533": 28, "603": 28, "686": 28, "289": 28, "254": 28, "266": 28, "29": 28, "93": 28, "reflect": 28, "infight": 28, "23": [28, 35, 38, 39], "31": 28, "30": [28, 41], "17": [28, 39], "103": 28, "36": 28, "194": 28, "41": 28, "129": 28, "61": 28, "673": [28, 40], "91": 28, "136": 28, "139": 28, "160": [28, 40], "132": 28, "45": 28, "76": 28, "456": 28, "addition": 28, "elev": 28, "uid": [28, 34], "gid": 28, "boot": 28, "slurm": [28, 39], "pyxi": 28, "makefil": 28, "nv_gpu": 28, "gpu_opt": 28, "destin": 28, "docker_run_arg": 28, "fine": [28, 30, 41], "repeatedli": [28, 41], "ran": [28, 35], "transit": 28, "hbm3": 28, "undefin": [28, 29], "newer": [28, 40], "trtllm_enable_xqa": 28, "shell": 28, "displai": 28, "gptj": [28, 38], "ckpt_config": 28, "gptjforcausallm": [28, 31], "28": 28, "50400": 28, "gelu": [28, 29], "rotary_dim": 28, "model_config": [28, 34], "strongly_typ": 28, "in_out_s": 28, "in_out": 28, "echo": 28, "awk": 28, "in_out_dim": 28, "warm_up": 28, "durat": 28, "num_run": 28, "input_output_len": 28, "11008": 28, "32000": 28, "rotary_bas": 28, "10000": [28, 29, 30], "rotary_sc": 28, "8192": 28, "28672": 28, "oversubscrib": 28, "falconforcausallm": [28, 31], "14848": 28, "65024": 28, "engine_path": 28, "_": 28, "allreduceconfig": 29, "intflag": [29, 33], "customallreducekernel": 29, "sync": [29, 34], "push_mod": 29, "use_memcpi": 29, "allreducestrategi": 29, "intenum": 29, "oneshot": 29, "twoshot": 29, "attentionmasktyp": [29, 30], "bidirect": [29, 30], "bidirectionalglm": 29, "causal": 29, "dimrang": 29, "ctor": 29, "layernormpositiontyp": [29, 31], "pre_layernorm": [29, 31], "layernormtyp": [29, 31], "groupnorm": [29, 30], "mlptype": [29, 31], "fusedgatedmlp": [29, 30], "gatedmlp": [29, 30], "alibi": 29, "alibi_with_scal": 29, "chatglm": [29, 37, 38, 40], "from_str": 29, "is_alibi": 29, "is_rop": 29, "rotaryscalingtyp": 29, "dim_rang": 29, "is_network_input": 29, "tensorloc": 29, "ab": [29, 37], "cast": 29, "is_dynam": 29, "exclud": 29, "is_trt_wrapp": 29, "differenti": 29, "physic": [29, 36], "mark_output": 29, "keepdim": 29, "ndim": 29, "permut": 29, "new_tensor": 29, "split_size_or_sect": 29, "dim0": 29, "dim1": 29, "zero_is_placehold": 29, "tensorrt_bind": 29, "unaryoper": 29, "closur": 29, "round": 29, "sin": 29, "iunarylay": 29, "unari": 29, "tanh": 29, "mul": 29, "prod": 29, "div": 29, "gt": 29, "lt": 29, "op_and": 29, "AND": 29, "op_or": 29, "OR": 29, "pow": 29, "ielementwiselay": 29, "union": 29, "amongst": 29, "section_s": 29, "deeplearn": [29, 38, 39], "html": [29, 38, 39], "replic": 29, "deleg": 29, "arang": 29, "ifilllay": 29, "filloper": 29, "linspac": 29, "_str_to_trt_dtype_dict": 29, "argmax": 29, "onnx": 29, "blob": 29, "ye": [29, 36], "avg_pool2d": 29, "kernel_s": [29, 30], "stride": [29, 30], "ceil_mod": [29, 30], "count_include_pad": [29, 30], "bert_attent": 29, "input_length": [29, 30, 31, 34], "relative_attent": [29, 30, 31], "relative_attention_bia": 29, "bert": [29, 37, 38], "1706": 29, "03762": 29, "2d": [29, 37], "sum_of_token": 29, "bertattentionplugin": 29, "max_seq_len": [29, 31, 34], "num_bucket": [29, 30, 31], "broadcast_help": 29, "split_siz": 29, "clip": 29, "inp": 29, "jj": 29, "len": [29, 34], "true_input": 29, "false_input": 29, "condition": 29, "ndarrai": 29, "iconstantlay": 29, "constant_to_tensor_": 29, "conv1d": [29, 30], "dilat": [29, 30], "conv2d": [29, 30], "conv_transpose2d": 29, "output_pad": [29, 30], "cumsum": [29, 40], "ilooplay": 29, "einsum": 29, "einsum_eq": 29, "ieinsumlay": 29, "summat": 29, "einstein": 29, "ascii": 29, "letter": 29, "comma": [29, 34], "subscript": 29, "diagon": 29, "ax": 29, "alphabet": 29, "arrow": 29, "ij": 29, "jk": 29, "ik": 29, "equival": 29, "ellipsi": 29, "syntax": 29, "rubric": 29, "ji": 29, "kj": 29, "dot": 29, "ijk": 29, "ikl": 29, "ijl": 29, "neither": [29, 36], "elementwise_binari": 29, "tp_group": [29, 30, 39], "sharding_dim": [29, 30], "tp_rank": [29, 30], "among": 29, "column": [29, 37], "portion": [29, 36], "transposit": 29, "default_net": 29, "plugin_config": [29, 31], "lookup_plugin": [29, 32], "igatherlay": 29, "tg_group": 29, "expand_shap": 29, "expans": 29, "islicelay": 29, "shrunk": 29, "expand_dim": 29, "ishufflelay": 29, "new_shap": 29, "shuffl": 29, "expand_dims_lik": 29, "expand_mask": 29, "tgt_len": [29, 30], "src_seq_len": 29, "tgt_seq_len": 29, "3rd": 29, "2nd": 29, "revers": 29, "gatherel": 29, "gather_last_token_logit": 29, "hidden_st": [29, 30, 31, 34, 39], "last_token_id": [29, 31, 39], "last_tokens_id": 29, "longest": 29, "geglu": 29, "generate_alibi_bias": 29, "key_length": [29, 30], "2211": [29, 37], "05100": 29, "generate_alibi_slop": 29, "alibi_scal": 29, "alibi_bias_max": [29, 30], "past_key_valu": [29, 30], "sequence_length": [29, 30, 34, 39], "host_past_key_value_length": [29, 30, 39], "host_max_attention_window_s": [29, 30, 39], "host_sink_token_length": [29, 30, 39], "context_length": [29, 30, 34, 39], "host_request_typ": [29, 30, 31, 39], "num_kv_head": [29, 30, 31, 34], "hidden_size_per_head": 29, "rotary_embedding_bas": [29, 30], "rotary_embedding_scale_typ": 29, "rotary_embedding_scal": [29, 30], "rotary_embedding_max_posit": 29, "kv_orig_quant_scal": 29, "kv_quant_orig_scal": 29, "attention_output_orig_quant_scal": 29, "max_context_length": [29, 30, 34, 36], "mask_typ": 29, "alibi_slop": 29, "kv_cache_block_point": [29, 30, 34, 39], "host_kv_cache_block_point": [29, 30, 34, 39], "do_cross_attent": [29, 30], "cross_qkv": 29, "cross_qkv_length": 29, "encoder_input_length": [29, 30, 34], "host_context_length": [29, 30, 31, 34, 39], "qkv_bia": 29, "use_cach": [29, 30, 31], "medusa_position_offset": [29, 30, 31, 34], "medusa_packed_mask": [29, 30, 31], "hint": 29, "regard": 29, "merg": 29, "qkv_dim": 29, "contigu": 29, "max_block": 29, "num_tokens_per_block": 29, "cache_indir_t": 29, "rope": 29, "theta": [29, 30], "ignor": 29, "rotari": 29, "glm": 29, "10b": 29, "max_blocks_per_sequ": 29, "num_medusa_token": [29, 34], "divup": 29, "group_norm": 29, "num_group": [29, 30], "ep": [29, 30], "todo": 29, "index_select": 29, "5th": 29, "interpol": 29, "scale_factor": 29, "nearest": 29, "align_corn": 29, "recompute_scale_factor": 29, "antialia": 29, "is_gated_activ": 29, "layer_norm": 29, "normalized_shap": [29, 30], "use_diff_of_squar": 29, "norm": 29, "simplest": 29, "gamma": 29, "formula": 29, "varianc": 29, "squar": 29, "var": 29, "epsilon": 29, "in_hidden_s": 29, "out_hidden_s": 29, "transa": 29, "transb": 29, "max_low_rank": 29, "lora_rank": 29, "lora_weights_point": 29, "lora_id": 29, "low_rank": 29, "in_point": 29, "out_point": 29, "mamba_conv1d": 29, "conv_state_or_ptr": 29, "conv_weight": 29, "conv_bia": 29, "slot_map": [29, 31], "seq_len": [29, 30], "conv": 29, "Or": [29, 34], "masked_scatt": 29, "masked_scatter_": 29, "masked_select": [29, 40], "nonzero": 29, "gatherv2": 29, "mat2": 29, "use_fp32_acc": 29, "imatrixmultiplylay": 29, "decis": 29, "ireducelay": 29, "non_gated_vers": 29, "outer": 29, "vec2": 29, "p2p": 29, "ncclrecv": 29, "repeat_interleav": 29, "rms_norm": 29, "06": [29, 30], "weig": 29, "22": [29, 39], "selective_scan": 29, "state_or_ptr": 29, "delta": 29, "delta_bia": 29, "bc": 29, "z": 29, "dt_rank": 29, "is_variable_b": 29, "is_variable_c": 29, "delta_softplu": 29, "ssm": 29, "dt_proj": 29, "softplu": 29, "ncclsend": 29, "samplemod": 29, "emul": [29, 40], "strict_bound": 29, "isoftmaxlay": 29, "nn": 29, "revert": 29, "ith": 29, "squared_relu": 29, "contact": 29, "a_1": 29, "a_2": 29, "a_n": 29, "a_": 29, "untouch": 29, "enforc": 29, "iselectlay": 29, "mish": 30, "local_layer_idx": 30, "apply_query_key_layer_sc": 30, "attention_head_s": 30, "attention_mask_typ": 30, "rotary_embedding_sc": 30, "rotary_embedding_percentag": 30, "cross_attent": [30, 34], "dense_bia": 30, "clip_qkv": 30, "skip_cross_qkv": [30, 31, 34], "attention_mask": [30, 31, 34], "kv_cache_param": [30, 31], "attention_param": [30, 31], "encoder_output": [30, 31, 34], "position_embed": 30, "norm_before_bmm1": 30, "lora_layer_param": 30, "cross_kv_cache_gen": [30, 31], "cross_qkv_reus": [30, 31], "attentionparam": [30, 31], "encoder_max_input_length": [30, 34], "is_valid": 30, "is_valid_cross_attn": 30, "bertattent": 30, "keyvaluecacheparam": [30, 31], "past_key_value_length": 30, "fill_none_tensor_list": 30, "list_siz": 30, "get_first_past_key_valu": 30, "ropeembeddingutil": 30, "apply_rotary_pos_emb": 30, "pos_emb_typ": 30, "apply_rotary_pos_emb_chatglm": 30, "create_sinusoidal_posit": 30, "num_po": 30, "rotate_every_two": 30, "rotate_half": 30, "compute_relative_bia": 30, "query_length": 30, "rel_attn_t": 30, "make_causal_mask": 30, "bsz": 30, "past_key_values_length": 30, "output_dtyp": 30, "in_channel": 30, "out_channel": 30, "padding_mod": 30, "convtranspose2d": 30, "output_s": 30, "num_embed": 30, "embedding_dim": 30, "weight_load": 30, "loaded_weight": 30, "prompttuningembed": 30, "supplementari": 30, "adequ": 30, "task_vocab_s": 30, "num_task": 30, "num_tokens_per_task": 30, "columnlinear": [30, 39], "alia": 30, "use_fp8": 30, "gather_output": [30, 39], "share_weight": 30, "strict_dtyp": 30, "pad_lda": 30, "lora_runtime_param": 30, "loraruntimeparam": 30, "multiply_gath": 30, "parallellmhead": 30, "qkvcolumnlinear": 30, "rowlinear": [30, 39], "multiply_reduc": 30, "ffn_hidden_s": [30, 31, 39], "num_channel": 30, "affin": 30, "elementwise_affin": 30, "avgpool2d": 30, "baichuanforcausallm": 31, "bertforquestionansw": 31, "type_vocab_s": 31, "pad_token_id": 31, "is_roberta": 31, "num_label": 31, "token_type_id": 31, "position_id": [31, 39], "bertforsequenceclassif": 31, "bertmodel": 31, "bloomforcausallm": 31, "bloommodel": 31, "prompt_task": [31, 34], "chatglmforcausallm": 31, "check_config": 31, "prepare_input": [31, 36], "chatglmmodel": 31, "decodermodel": 31, "encoder_num_head": 31, "encoder_hidden_s": 31, "encoder_head_s": 31, "encoder_num_kv_head": 31, "has_position_embed": [31, 34], "has_embedding_layernorm": 31, "has_embedding_scal": 31, "has_attention_qkvo_bia": 31, "has_mlp_bia": 31, "has_model_final_layernorm": 31, "layernorm_ep": 31, "layernorm_posit": 31, "layernorm_typ": 31, "mlp_type": 31, "rescale_before_lm_head": 31, "has_lm_head_bia": 31, "residual_sc": 31, "fp16_clamp": 31, "generationmixin": 31, "decoder_input_id": 31, "cross_attention_mask": [31, 34], "lora_param": 31, "loraparam": 31, "max_decoder_input_len": 31, "max_new_token": [31, 34, 36], "max_encoder_input_len": 31, "brief": [31, 34], "fed": 31, "encodermodel": 31, "use_prompt_tun": [31, 40], "prompt_embedding_table_s": 31, "falconmodel": 31, "gptforcausallm": 31, "use_lora": 31, "lorabuildconfig": 31, "gptjmodel": 31, "gptmodel": 31, "gptneoxforcausallm": 31, "gptneoxmodel": 31, "gemmaforcausallm": 31, "default_plugin_config": 31, "to_trt": 31, "meta_ckpt_dir": 31, "calib_batch": 31, "calib_batch_s": 31, "1234": 31, "tokenizer_max_seq_length": 31, "llamamodel": 31, "mptforcausallm": 31, "mptmodel": 31, "mambalmheadmodel": 31, "conv_stat": 31, "ssm_state": 31, "opt_num_token": 31, "max_draft_len": [31, 41], "medusaforcausallm": 31, "optmodel": 31, "phiforcausallm": 31, "convert_hf_checkpoint": 31, "phimodel": 31, "from_dict": 31, "from_json_fil": 31, "config_fil": 31, "set_if_not_exist": 31, "set_rank": 31, "to_dict": 31, "from_config": 31, "load_partial_weight": 31, "position_encoding_2d": 31, "qwenforcausallm": 31, "whisperencod": 31, "n_mel": 31, "n_ctx": 31, "n_state": 31, "n_head": 31, "n_layer": 31, "quantize_model": 31, "pluginconfig": 32, "smooth_quant_gemm_plugin": 32, "identity_plugin": 32, "layernorm_quantization_plugin": 32, "rmsnorm_quantization_plugin": 32, "nccl_plugin": 32, "weight_only_groupwise_quant_matmul_plugin": 32, "weight_only_quant_matmul_plugin": 32, "quantize_per_token_plugin": 32, "quantize_tensor_plugin": 32, "moe_plugin": 32, "mamba_conv1d_plugin": [32, 34], "context_fmha_fp32_acc": 32, "enable_xqa": 32, "attention_qk_half_accumul": 32, "use_paged_context_fmha": [32, 41], "use_context_fmha_for_gener": [32, 34], "multiple_profil": 32, "paged_st": [32, 34], "to_legacy_set": 32, "legaci": 32, "central": 32, "quantalgo": 33, "strenum": 33, "quantize_and_export": 33, "qformat": 33, "kv_cache_dtyp": 33, "calib_s": 33, "awq_block_s": 33, "max_seq_length": 33, "chatglmgenerationsess": 34, "engine_buff": 34, "debug_mod": [34, 39], "debug_tensors_to_sav": 34, "cuda_graph_mod": [34, 39], "generationsequ": 34, "seq_idx": 34, "batch_idx": 34, "get_batch_idx": 34, "idx": 34, "get_seq_idx": 34, "buffer_alloc": 34, "cuda_stream_guard": 34, "exit": [34, 35], "sampling_config": 34, "no_repeat_ngram_s": 34, "output_sequence_length": 34, "return_dict": 34, "stopping_criteria": 34, "stoppingcriteria": [34, 40], "logits_processor": 34, "logitsprocessor": [34, 40], "decode_batch": 34, "decode_regular": 34, "scfg": 34, "ite": 34, "sequence_limit_length": 34, "stop_words_data": 34, "bad_words_data": 34, "decode_stream": 34, "dump_debug_buff": 34, "early_stop_criteria": 34, "should_stop": 34, "filter_medusa_logit": 34, "best_path": 34, "best_path_length": 34, "medusa_logit": 34, "nmh": 34, "nmt": [34, 38, 40], "finalize_decod": 34, "in_progress": 34, "find_best_medusa_path": 34, "next_logit": 34, "temp": 34, "first_lay": 34, "get_next_medusa_token": 34, "next_medusa_logit": 34, "handle_per_step": 34, "next_step_tensor": 34, "runtimetensor": 34, "has_token_type_embed": 34, "is_medusa_mod": 34, "last_lay": 34, "max_medusa_token": 34, "max_prompt_embedding_table_s": [34, 40], "medusa_path": 34, "medusa_temperatur": [34, 41], "medusa_topk": 34, "medusa_tree_id": 34, "next_medusa_input_id": 34, "num_heads_kv": 34, "num_medusa_head": 34, "pp_communicate_final_output_id": 34, "final_output_id": 34, "pp_communicate_new_token": 34, "cache_indir": 34, "process_logits_for_medusa_mod": 34, "context_has_medusa_token": 34, "next_step_buff": 34, "_runtim": 34, "lora_manag": 34, "loramanag": 34, "lora_uid": 34, "medusa_choic": [34, 41], "update_kv_cache_draft_token_loc": 34, "best_path_len": 34, "update_output_ids_by_offset": 34, "new_generated_id": 34, "use_gpt_attention_plugin": 34, "use_mamba_conv1d_plugin": 34, "memory_pool": 34, "max_blocks_per_seq": 34, "sink_token_len": 34, "use_one_more_block": 34, "add_sequ": 34, "context_len": 34, "get_block_point": 34, "logitsprocessorlist": 34, "mambalmheadmodelgenerationsess": 34, "mamba_d_conv": 34, "mamba_d_st": 34, "mamba_expand": 34, "model_nam": 34, "trtllm_modules_to_hf_modul": 34, "modelrunnermixin": 34, "from_dir": 34, "lora_ckpt_sourc": 34, "from_engin": 34, "batch_input_id": 34, "prompt_table_path": 34, "parametr": 34, "npy": 34, "nemo_prompt_convert": 34, "criteria": 34, "output_id": 34, "context_logit": 34, "generation_logit": 34, "serialize_engin": 34, "vocab_size_pad": 34, "qwenforcausallmgenerationsess": 34, "global_max_input_length": 34, "global_max_output_length": 34, "runtime_rank": 34, "num_beam": 34, "top_k": 34, "top_p": 34, "top_p_decai": 34, "top_p_min": 34, "top_p_reset_id": 34, "use_beam_hyp": 34, "beam_search_diversity_r": 34, "output_cum_log_prob": 34, "output_log_prob": 34, "iexecutioncontext": 34, "create_execution_context": 34, "icudaengin": [34, 36], "from_serialized_engin": 34, "infer_shap": 34, "tensorinfo": 34, "set_input_shap": 34, "fail": [34, 36, 39], "succeed": 34, "async": 34, "set_shap": 34, "tensor_dict": 34, "stoppingcriterialist": 34, "to_word_list_format": 34, "word_dict": 34, "add_special_token": 34, "am": 34, "happi": 34, "sad": 34, "licens": 35, "agre": 35, "authent": 35, "minim": 35, "login": 35, "tensorrtllm": 35, "exammpl": 35, "ckpt": 35, "prewritten": 35, "desir": 35, "ve": 35, "got": 35, "tokenizer_dir": [35, 39], "input_text": 35, "nine": 35, "french": 35, "rapid": [35, 41], "ngc": 35, "all_model": 35, "inflight_batcher_llm": 35, "skeleton": 35, "fill_templ": 35, "in_plac": 35, "pbtxt": 35, "decoupled_mod": 35, "batch_scheduler_polici": 35, "guaranteed_complet": 35, "max_num_sequ": 35, "preprocess": [35, 37], "tokenizer_typ": 35, "postprocess": 35, "model_repo": 35, "shm": [35, 39], "nvcr": [35, 40], "tritonserv": [35, 40], "py3": [35, 40], "sentencepiec": 35, "protobuf": 35, "launch_triton_serv": 35, "endpoint": 35, "curl": 35, "localhost": 35, "8000": 35, "ensembl": 35, "text_input": 35, "max_token": 35, "bad_word": 35, "stop_word": 35, "showcas": 35, "question": 36, "contributor": 36, "chosen": 36, "live": 36, "device_memory_s": 36, "fmha": [36, 40], "explan": 36, "relationship": 36, "linearli": 36, "fold": 36, "behav": 36, "nor": 36, "No": 36, "buffermanag": 36, "driver": 36, "smi": 36, "concern": 36, "inspect": 36, "layout": 36, "theoret": 36, "succe": 36, "check_gpt_mem_usag": 36, "exceed": 36, "verbos": 36, "sequenti": 36, "16x": 36, "recip": 37, "ieee": 37, "satfinit": 37, "fp": 37, "static_cast": 37, "channel": [37, 40], "mi": 37, "ni": 37, "10438": 37, "downstream": 37, "2210": 37, "17323": 37, "2306": 37, "00978": 37, "weightonlygroupwisequantmatmulplugin": 37, "weight_only_groupwise_quant_matmul": 37, "baichuan": [37, 38, 40], "blip": [37, 38, 40], "v3": 37, "flan": [37, 38], "internlm": [37, 38, 40], "mamba": [37, 38, 40], "phi": [37, 38, 40], "qwen": [37, 38, 40], "replit": [37, 38], "santacod": [37, 38], "skywork": [37, 38, 40], "starcoder1": 37, "starcoder2": [37, 40], "whisper": [37, 38, 40], "blip2": [37, 38, 40], "llava": [37, 38, 40], "vila": [37, 40], "nougat": [37, 38, 40], "vision": 37, "modal": [37, 38], "int4_weight": 37, "w4a": 37, "int8_weight": 37, "w8a": 37, "a8": 37, "per_channel": 37, "per_token": 37, "per_group": 37, "fp8_qdq": 37, "x86_64": 38, "l40": 38, "a30": 38, "t4": 38, "v100": [38, 40], "sm90": [38, 40], "sm89": [38, 40], "sm80": [38, 40], "sm86": [38, 40], "sm75": 38, "sm70": 38, "tree": [38, 39], "bart": [38, 40], "multimod": [38, 40], "dbrx": 38, "fairseq": [38, 40], "gptneox": 38, "mbart": [38, 40], "mt5": 38, "vl": [38, 40], "qwenvl": [38, 40], "roberta": [38, 40], "smaug": [38, 40], "starcod": 38, "xl": 38, "sm": [38, 40], "obei": 39, "paradigm": 39, "interest": 39, "register_network_output": 39, "gm": 39, "named_network_output": 39, "net": 39, "_mark_output": 39, "residu": 39, "attention_output": 39, "mlp_output": 39, "enable_debug_output": 39, "rf": 39, "pushd": 39, "pytorch_model": 39, "wget": 39, "resolv": 39, "popd": 39, "current_stream": 39, "cuda_stream": 39, "instance_idx": 39, "cuda_graph_inst": 39, "cuassert": 39, "cudart": 39, "cudagraphlaunch": 39, "_run": 39, "runtimeerror": 39, "debug_buff": 39, "use_py_sess": 39, "dict_kei": 39, "18": 39, "0294": 39, "0260": 39, "0776": 39, "0560": 39, "0235": 39, "0273": 39, "0071": 39, "5879": 39, "1993": 39, "0449": 39, "6299": 39, "5957": 39, "8779": 39, "1050": 39, "7090": 39, "0910": 39, "0713": 39, "2939": 39, "1212": 39, "0903": 39, "5918": 39, "1045": 39, "3445": 39, "1082": 39, "0723": 39, "0732": 39, "6157": 39, "3452": 39, "2998": 39, "2649": 39, "7134": 39, "9692": 39, "1141": 39, "0096": 39, "9521": 39, "1437": 39, "2107": 39, "5874": 39, "8179": 39, "7900": 39, "6890": 39, "6064": 39, "4192": 39, "0047": 39, "3887": 39, "9028": 39, "0682": 39, "2820": 39, "7949": 39, "5073": 39, "1721": 39, "5830": 39, "1378": 39, "0070": 39, "0804": 39, "1272": 39, "6255": 39, "1072": 39, "0523": 39, "7144": 39, "3328": 39, "8828": 39, "3442": 39, "8149": 39, "0630": 39, "2305": 39, "2225": 39, "2079": 39, "1459": 39, "3555": 39, "1672": 39, "1135": 39, "1290": 39, "1556": 39, "3977": 39, "8218": 39, "3291": 39, "8672": 39, "born": 39, "north": 39, "east": 39, "franc": 39, "soyer": 39, "chef": 39, "london": 39, "earli": 39, "cuda_launch_block": 39, "statu": 39, "09": 39, "03": 39, "00": 39, "gptlmheadmodel": 39, "plugin_v2_gemm_0": 39, "pluginv2build": 39, "reportpluginerror": 39, "mpi4pi": [39, 40], "interfer": 39, "pmi2_init": 39, "ompi": 39, "pmi": 39, "upon": [39, 41], "pmix": 39, "dedic": 39, "feedback": 40, "forum": 40, "02": 40, "break": 40, "determinist": 40, "air": 40, "rewind": 40, "planner": 40, "104": 40, "ootb": 40, "arbitrari": 40, "dataset": 40, "percentil": 40, "gptdecoderbatch": 40, "infrastructur": 40, "auto_parallel": 40, "asyncllmengin": 40, "generationexecutor": 40, "refin": 40, "generate_async": 40, "streaming_llm": 40, "distil": 40, "72b": 40, "openai": 40, "bug": 40, "encoder_input_len_rang": 40, "992": 40, "983": 40, "1003": 40, "wrong": [40, 41], "987": 40, "1118": 40, "1123": 40, "1181": 40, "967": 40, "1148": 40, "chatglm2": 40, "failur": 40, "1239": 40, "1242": 40, "modelrunnercpp": [34, 40], "1183": 40, "1267": 40, "input_fil": 40, "reducescatt": 40, "274": 40, "275": 40, "chatglm3": 40, "32k": 40, "794": 40, "thank": 40, "eddi": 40, "wang1120": 40, "erenup": 40, "new_workflow": 40, "maxnumsequ": 40, "abnorm": 40, "639": 40, "705": 40, "741": 40, "crash": 40, "649": 40, "695": 40, "pickl": 40, "701": 40, "custom_all_reduc": 40, "825": 40, "935": 40, "minor": 40, "enable_trt_overlap": 40, "ping": 40, "pong": 40, "xqa": 40, "perf_best_practic": 40, "sota": 40, "133": 40, "typo": 40, "739": 40, "cutlass": 40, "fhma": 40, "mmha": 40, "warp": 40, "288": 40, "codellama": 40, "hang": 40, "149": 40, "averag": 41, "situat": 41, "underutil": 41, "substanti": 41, "assumpt": 41, "twofold": 41, "successfulli": 41, "cours": 41, "wors": 41, "auxiliari": 41, "forecast": 41, "prove": 41, "simpler": 41, "summari": 41, "furthermor": 41, "pronounc": 41, "act": 41, "essenti": 41, "verif": 41, "predefin": 41, "orchestr": 41, "procedur": 41, "llmrequest": 41, "advis": 41, "unchang": 41, "realiz": 41, "emploi": 41, "consolid": 41, "spars": 41, "simultan": 41, "likelihood": 41, "albeit": 41, "recogn": 41, "grow": 41, "focus": 41, "strike": 41, "breadth": 41, "depth": 41, "mh": 41, "l": 41, "denot": 41, "hk": 41, "mh1": 41, "examin": 41, "incorrect": 41, "guidanc": 41, "vicuna": 41, "phasem": 41, "free_gpu_memory_fract": 34}, "objects": {"": [[1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorMap"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::medusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getLogitsPostProcessorMapEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorMap"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getMedusaChoicesEv", "tensorrt_llm::executor::ExecutorConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mLogitsPostProcessorMapE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorMap"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mMedusaChoicesE", "tensorrt_llm::executor::ExecutorConfig::mMedusaChoices"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE8SizeType", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE8SizeType", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setLogitsPostProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorMap"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setLogitsPostProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorMap::logitsPostProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE8SizeType", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE8SizeType", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::ExecutorConfig::setMedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::ExecutorConfig::setMedusaChoices::medusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE8SizeType", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE8SizeType", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindow"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getMaxAttentionWindowEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindow"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mMaxAttentionWindowE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindow"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI8SizeTypeEE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI8SizeTypeEE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI8SizeTypeEE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI8SizeTypeEE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE", "tensorrt_llm::executor::PhonyNameDueToError::value"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE", "tensorrt_llm::executor::PhonyNameDueToError::value"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE", "tensorrt_llm::executor::PhonyNameDueToError::value"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE", "tensorrt_llm::executor::PhonyNameDueToError::value"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6Tensor", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6Tensor", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::maxNewTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::speculativeDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getMaxNewTokensEv", "tensorrt_llm::executor::Request::getMaxNewTokens"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getSpeculativeDecodingConfigEv", "tensorrt_llm::executor::Request::getSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE8SizeType", "tensorrt_llm::executor::Request::setEndId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE8SizeType", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE8SizeType", "tensorrt_llm::executor::Request::setPadId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE8SizeType", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request28setSpeculativeDecodingConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Request::setSpeculativeDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request28setSpeculativeDecodingConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Request::setSpeculativeDecodingConfig::specDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6Result", "tensorrt_llm::executor::Response::Response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringE", "tensorrt_llm::executor::Response::Response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6Result", "tensorrt_llm::executor::Response::Response::Result"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6Result", "tensorrt_llm::executor::Response::Response::requestId"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minLength"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::randomSeed"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinLengthEv", "tensorrt_llm::executor::SamplingConfig::getMinLength"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig13getRandomSeedEv", "tensorrt_llm::executor::SamplingConfig::getRandomSeed"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinLengthE", "tensorrt_llm::executor::SamplingConfig::mMinLength"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig11mRandomSeedE", "tensorrt_llm::executor::SamplingConfig::mRandomSeed"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE15SchedulerPolicy", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE15SchedulerPolicy", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::policy"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig9getPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig7mPolicyE", "tensorrt_llm::executor::SchedulerConfig::mPolicy"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerPolicyE", "tensorrt_llm::executor::SchedulerPolicy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::SchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::SchedulerPolicy::kMAX_UTILIZATION"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape7DimTypeE", "tensorrt_llm::executor::Shape::DimType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI7DimTypeEE", "tensorrt_llm::executor::Shape::Shape"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK7DimTypeN4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK7DimTypeN4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI7DimTypeEE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK7DimTypeN4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8SizeTypeE", "tensorrt_llm::executor::SizeType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::acceptanceThreshold"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::logits"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::tokens"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::SpeculativeDecodingConfig::getAcceptanceThreshold"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig9getLogitsEv", "tensorrt_llm::executor::SpeculativeDecodingConfig::getLogits"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig9getTokensEv", "tensorrt_llm::executor::SpeculativeDecodingConfig::getTokens"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::SpeculativeDecodingConfig::mAcceptanceThreshold"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig7mLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::mLogits"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig7mTokensE", "tensorrt_llm::executor::SpeculativeDecodingConfig::mTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 5, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 6, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 5, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 6, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 4, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 4, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::kDefaultIterStatsMaxIterations"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::kDefaultRequestStatsMaxIterations"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi", "tensorrt_llm::runtime::BufferManager::initMemoryPool::device"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi", "tensorrt_llm::runtime::BufferManager::memoryPoolFree::device"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved::device"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::device"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed::device"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 4, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 4, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 4, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::endIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::logits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxAttentionWindow"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::maxLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::DecodingInput::DecodingInput::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE", "tensorrt_llm::runtime::DecodingInput::badWordsList"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE", "tensorrt_llm::runtime::DecodingInput::finished"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE", "tensorrt_llm::runtime::DecodingInput::logits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12maxBatchSizeE", "tensorrt_llm::runtime::DecodingInput::maxBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE", "tensorrt_llm::runtime::DecodingInput::stopWordsList"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingModeE", "tensorrt_llm::runtime::DecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode10BeamSearchEv", "tensorrt_llm::runtime::DecodingMode::BeamSearch"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::runtime::DecodingMode::DecodingMode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::runtime::DecodingMode::DecodingMode::state"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6MedusaEv", "tensorrt_llm::runtime::DecodingMode::Medusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode4NoneEv", "tensorrt_llm::runtime::DecodingMode::None"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode4TopKEv", "tensorrt_llm::runtime::DecodingMode::TopK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode8TopKTopPEv", "tensorrt_llm::runtime::DecodingMode::TopKTopP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode4TopPEv", "tensorrt_llm::runtime::DecodingMode::TopP"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::DecodingMode::UnderlyingType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::DecodingMode::allBitSet"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::DecodingMode::allBitSet::bits"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::DecodingMode::anyBitSet"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::DecodingMode::anyBitSet::bits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12isBeamSearchEv", "tensorrt_llm::runtime::DecodingMode::isBeamSearch"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode8isMedusaEv", "tensorrt_llm::runtime::DecodingMode::isMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6isNoneEv", "tensorrt_llm::runtime::DecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6isTopKEv", "tensorrt_llm::runtime::DecodingMode::isTopK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode13isTopKandTopPEv", "tensorrt_llm::runtime::DecodingMode::isTopKandTopP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12isTopKorTopPEv", "tensorrt_llm::runtime::DecodingMode::isTopKorTopP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6isTopPEv", "tensorrt_llm::runtime::DecodingMode::isTopP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode11kBeamSearchE", "tensorrt_llm::runtime::DecodingMode::kBeamSearch"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode7kMedusaE", "tensorrt_llm::runtime::DecodingMode::kMedusa"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode5kNoneE", "tensorrt_llm::runtime::DecodingMode::kNone"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode5kTopKE", "tensorrt_llm::runtime::DecodingMode::kTopK"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode9kTopKTopPE", "tensorrt_llm::runtime::DecodingMode::kTopKTopP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode5kTopPE", "tensorrt_llm::runtime::DecodingMode::kTopP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6mStateE", "tensorrt_llm::runtime::DecodingMode::mState"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime12DecodingModeeqERK12DecodingMode", "tensorrt_llm::runtime::DecodingMode::operator=="], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime12DecodingModeeqERK12DecodingMode", "tensorrt_llm::runtime::DecodingMode::operator==::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::isDone"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScores"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScores"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeams"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsTgt"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsTgt"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput::ids"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputsE", "tensorrt_llm::runtime::DecodingOutput::MedusaOutputs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs27medusaAcceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaAcceptedLengthsCumSum"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs23medusaAcceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaAcceptedTokensLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs21medusaNextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaNextDraftTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs18medusaPathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaPathsOffsets"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE", "tensorrt_llm::runtime::DecodingOutput::finished"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13medusaOutputsE", "tensorrt_llm::runtime::DecodingOutput::medusaOutputs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE", "tensorrt_llm::runtime::GenerationInput"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE", "tensorrt_llm::runtime::GenerationInput::Base"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::endId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::ids"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::lengths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::packed"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenerationInput::GenerationInput::padId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenerationInput::TensorPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE", "tensorrt_llm::runtime::GenerationOutput"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE", "tensorrt_llm::runtime::GenerationOutput::Base"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::ids"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenerationOutput::GenerationOutput::lengths"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenerationOutput::TensorPtr"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::endId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::ids"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::lengths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::packed"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput::padId"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput::PromptTuningParams"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE", "tensorrt_llm::runtime::GenericGenerationInput::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE", "tensorrt_llm::runtime::GenericGenerationInput::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE", "tensorrt_llm::runtime::GenericGenerationInput::badWordsList"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE", "tensorrt_llm::runtime::GenericGenerationInput::embeddingBias"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE", "tensorrt_llm::runtime::GenericGenerationInput::endId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE", "tensorrt_llm::runtime::GenericGenerationInput::ids"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE", "tensorrt_llm::runtime::GenericGenerationInput::lengths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE", "tensorrt_llm::runtime::GenericGenerationInput::maxNewTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE", "tensorrt_llm::runtime::GenericGenerationInput::packed"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE", "tensorrt_llm::runtime::GenericGenerationInput::padId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE", "tensorrt_llm::runtime::GenericGenerationInput::promptTuningParams"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE", "tensorrt_llm::runtime::GenericGenerationInput::stopWordsList"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE", "tensorrt_llm::runtime::GenericGenerationOutput"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE", "tensorrt_llm::runtime::GenericGenerationOutput::Callback"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput::ids"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput::lengths"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE", "tensorrt_llm::runtime::GenericGenerationOutput::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE", "tensorrt_llm::runtime::GenericGenerationOutput::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE", "tensorrt_llm::runtime::GenericGenerationOutput::contextLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE", "tensorrt_llm::runtime::GenericGenerationOutput::cumLogProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE", "tensorrt_llm::runtime::GenericGenerationOutput::generationLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE", "tensorrt_llm::runtime::GenericGenerationOutput::ids"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE", "tensorrt_llm::runtime::GenericGenerationOutput::lengths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE", "tensorrt_llm::runtime::GenericGenerationOutput::logProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE", "tensorrt_llm::runtime::GenericGenerationOutput::onTokenGenerated"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxNumMedusaHeads"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxTokensPerStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forward::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::decodingInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::decodingOutput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::finalOutputIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::GptDecoder::gatherTree::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE", "tensorrt_llm::runtime::GptDecoder::mLogProbsTiled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE", "tensorrt_llm::runtime::GptDecoder::mMaxBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5mPropE", "tensorrt_llm::runtime::GptDecoder::mProp"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::GptDecoder::setup::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE", "tensorrt_llm::runtime::GptDecoderBatch"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatch::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE", "tensorrt_llm::runtime::GptDecoderBatch::DecodingOutputPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch::vocabSizePadded"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatch::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14SharedConstPtrE", "tensorrt_llm::runtime::GptDecoderBatch::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatch::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch21allocateMedusaBuffersEv", "tensorrt_llm::runtime::GptDecoderBatch::allocateMedusaBuffers"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::finalize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv", "tensorrt_llm::runtime::GptDecoderBatch::finalize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::finalize::batchIdx"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsync::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncFusedDecoder"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncFusedDecoder::eventStart"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncFusedDecoder::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncFusedDecoder::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncFusedDecoder::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncUnfusedDecoder"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncUnfusedDecoder::eventStart"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncUnfusedDecoder::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncUnfusedDecoder::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent", "tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncUnfusedDecoder::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::GptDecoderBatch::forwardSync::token"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv", "tensorrt_llm::runtime::GptDecoderBatch::getAllNewTokens"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs::batchIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getFinished"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getLogProbs::batchIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch30getMedusaAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::GptDecoderBatch::getMedusaAcceptedLengthsCumSum"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch28getMedusaAcceptedPackedPathsEv", "tensorrt_llm::runtime::GptDecoderBatch::getMedusaAcceptedPackedPaths"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbFinished"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::GptDecoderBatch::getNbSteps"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getNewTokens::iter"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18getNextDraftTokensEv", "tensorrt_llm::runtime::GptDecoderBatch::getNextDraftTokens"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::getOutputIds::batchIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::GptDecoderBatch::getParentIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mAcceptByLogitsE", "tensorrt_llm::runtime::GptDecoderBatch::mAcceptByLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mActualBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mBatchSlotsAcceptLogitsE", "tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsAcceptLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mBatchSlotsAcceptTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsAcceptTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mBatchSlotsDecoderE", "tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsDecoder"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mBatchSlotsSetupE", "tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsSetup"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE", "tensorrt_llm::runtime::GptDecoderBatch::mBeamWidths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatch::mBufferManager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mCurandStatesE", "tensorrt_llm::runtime::GptDecoderBatch::mCurandStates"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE", "tensorrt_llm::runtime::GptDecoderBatch::mDecoders"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingInputs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE", "tensorrt_llm::runtime::GptDecoderBatch::mDecodingOutputs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mDraftLogitsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mDraftProbsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE", "tensorrt_llm::runtime::GptDecoderBatch::mDraftTokenIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE", "tensorrt_llm::runtime::GptDecoderBatch::mFinished"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mFinishedStepsE", "tensorrt_llm::runtime::GptDecoderBatch::mFinishedSteps"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE", "tensorrt_llm::runtime::GptDecoderBatch::mFinishedSum"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardEvent"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE", "tensorrt_llm::runtime::GptDecoderBatch::mForwardToken"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mFusedDecoderE", "tensorrt_llm::runtime::GptDecoderBatch::mFusedDecoder"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29mGeneratedTokensPerEngineStepE", "tensorrt_llm::runtime::GptDecoderBatch::mGeneratedTokensPerEngineStep"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingInput"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE", "tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingOutput"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mMaxAttentionWindowE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxAttentionWindow"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mMaxBadWordsLenE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxBadWordsLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxNewTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxSequenceLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mMaxStopWordsLenE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxStopWordsLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24mMaxTokensPerDecoderStepE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerDecoderStep"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mMaxTokensPerEngineStepE", "tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerEngineStep"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE", "tensorrt_llm::runtime::GptDecoderBatch::mNbSteps"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE", "tensorrt_llm::runtime::GptDecoderBatch::mNumDraftTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mSinkTokenLengthE", "tensorrt_llm::runtime::GptDecoderBatch::mSinkTokenLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE", "tensorrt_llm::runtime::GptDecoderBatch::mStream"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE", "tensorrt_llm::runtime::GptDecoderBatch::mStreams"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mTargetLogitsPtrsE", "tensorrt_llm::runtime::GptDecoderBatch::mTargetLogitsPtrs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mTargetProbsE", "tensorrt_llm::runtime::GptDecoderBatch::mTargetProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mUseMedusaE", "tensorrt_llm::runtime::GptDecoderBatch::mUseMedusa"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoderBatch::mVocabSizePadded"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::inputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::outputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newBatch::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::batchIdx"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::request"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequest::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16newRequestMedusaE8SizeTypeRKN13decoder_batch7RequestE", "tensorrt_llm::runtime::GptDecoderBatch::newRequestMedusa"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16newRequestMedusaE8SizeTypeRKN13decoder_batch7RequestE", "tensorrt_llm::runtime::GptDecoderBatch::newRequestMedusa::batchIdx"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16newRequestMedusaE8SizeTypeRKN13decoder_batch7RequestE", "tensorrt_llm::runtime::GptDecoderBatch::newRequestMedusa::request"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29newRequestSpeculativeDecodingE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequestSpeculativeDecoding"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29newRequestSpeculativeDecodingE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequestSpeculativeDecoding::batchIdx"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29newRequestSpeculativeDecodingE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequestSpeculativeDecoding::request"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29newRequestSpeculativeDecodingE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig", "tensorrt_llm::runtime::GptDecoderBatch::newRequestSpeculativeDecoding::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::GptDecoderBatch::newRequests"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::GptDecoderBatch::newRequests::requests"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::GptDecoderBatch::newRequests::samplingConfigs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::GptDecoderBatch::newRequests::seqSlots"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType", "tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest::batchIdx"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::dtype"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::fusedDecoder"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxAttentionWindow"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxBeamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::maxTokensPerStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::mode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setup::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11setupMedusaERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setupMedusa"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11setupMedusaERK14GptModelConfig", "tensorrt_llm::runtime::GptDecoderBatch::setupMedusa::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mGptModelConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE", "tensorrt_llm::runtime::GptModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::dtype"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::hiddenSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbHeads"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::nbLayers"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::GptModelConfig::GptModelConfig::vocabSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGlm"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGpt"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::GptModelConfig::ModelVariant::kMamba"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeContextLogits::computeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig27getContextFMHAForGenerationEv", "tensorrt_llm::runtime::GptModelConfig::getContextFMHAForGeneration"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv", "tensorrt_llm::runtime::GptModelConfig::getDataType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::GptModelConfig::getHiddenSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::GptModelConfig::getKvDataType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::GptModelConfig::getLoraModules"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMambaConfigEv", "tensorrt_llm::runtime::GptModelConfig::getMambaConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxBatchSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::GptModelConfig::getMaxBeamWidth"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxDraftLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxDraftLen"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxInputLen"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::GptModelConfig::getMaxLoraRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::GptModelConfig::getMaxNumTokens"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::GptModelConfig::getMaxSequenceLen"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv", "tensorrt_llm::runtime::GptModelConfig::getMaxTokensPerStep"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMedusaModuleEv", "tensorrt_llm::runtime::GptModelConfig::getMedusaModule"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::GptModelConfig::getMlpHiddenSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv", "tensorrt_llm::runtime::GptModelConfig::getModelVariant"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbHeads"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv", "tensorrt_llm::runtime::GptModelConfig::getNbKvHeads"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getNbLayers::pipelineParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::GptModelConfig::getPagedContextFMHA"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv", "tensorrt_llm::runtime::GptModelConfig::getQuantMode"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::GptModelConfig::getSizePerHead"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::GptModelConfig::getTokensPerBlock"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::GptModelConfig::getVocabSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType", "tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded::worldSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14hasMambaConfigEv", "tensorrt_llm::runtime::GptModelConfig::hasMambaConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10isSsmBasedEv", "tensorrt_llm::runtime::GptModelConfig::isSsmBased"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::GptModelConfig::isTransformerBased"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeContextLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::GptModelConfig::mComputeGenerationLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE", "tensorrt_llm::runtime::GptModelConfig::mDataType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::GptModelConfig::mHiddenSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE", "tensorrt_llm::runtime::GptModelConfig::mInputPacked"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mLoraModulesE", "tensorrt_llm::runtime::GptModelConfig::mLoraModules"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMambaConfigE", "tensorrt_llm::runtime::GptModelConfig::mMambaConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::GptModelConfig::mMaxBeamWidth"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxDraftLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxInputLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::GptModelConfig::mMaxLoraRank"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::GptModelConfig::mMaxNumTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::GptModelConfig::mMaxPromptEmbeddingTableSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::GptModelConfig::mMaxSequenceLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMedusaModuleE", "tensorrt_llm::runtime::GptModelConfig::mMedusaModule"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::GptModelConfig::mMlpHiddenSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE", "tensorrt_llm::runtime::GptModelConfig::mModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbHeads"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE", "tensorrt_llm::runtime::GptModelConfig::mNbKvHeads"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE", "tensorrt_llm::runtime::GptModelConfig::mNbLayers"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::GptModelConfig::mPagedContextFMHA"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE", "tensorrt_llm::runtime::GptModelConfig::mPagedKvCache"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mPagedStateE", "tensorrt_llm::runtime::GptModelConfig::mPagedState"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE", "tensorrt_llm::runtime::GptModelConfig::mQuantMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::GptModelConfig::mSizePerHead"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::GptModelConfig::mTokensPerBlock"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mUseContextFMHAForGenerationE", "tensorrt_llm::runtime::GptModelConfig::mUseContextFMHAForGeneration"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE", "tensorrt_llm::runtime::GptModelConfig::mUseCustomAllReduce"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseGptAttentionPlugin"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseLoraPlugin"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::GptModelConfig::mUseMambaConv1dPlugin"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE", "tensorrt_llm::runtime::GptModelConfig::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::GptModelConfig::setLoraModules"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::GptModelConfig::setLoraModules::loraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMambaConfigERK11MambaConfig", "tensorrt_llm::runtime::GptModelConfig::setMambaConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMambaConfigERK11MambaConfig", "tensorrt_llm::runtime::GptModelConfig::setMambaConfig::mambaConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize::maxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBeamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen::maxDraftLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxInputLen::maxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxLoraRank"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxLoraRank::maxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens::maxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxSequenceLen"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMedusaModuleERK12MedusaModule", "tensorrt_llm::runtime::GptModelConfig::setMedusaModule"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMedusaModuleERK12MedusaModule", "tensorrt_llm::runtime::GptModelConfig::setMedusaModule::medusaModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMlpHiddenSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::GptModelConfig::setModelVariant::modelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setNbKvHeads::nbKvHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::GptModelConfig::setPagedContextFMHA"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::GptModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::GptModelConfig::setQuantMode::QuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setSizePerHead"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setSizePerHead::sizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType", "tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb", "tensorrt_llm::runtime::GptModelConfig::setUseContextFMHAForGeneration"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb", "tensorrt_llm::runtime::GptModelConfig::setUseContextFMHAForGeneration::useContextFMHAForGeneration"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::GptModelConfig::supportsInflightBatching"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb", "tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce::customAllReduce"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::GptModelConfig::useLoraPlugin"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::GptModelConfig::useLoraPlugin"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::GptModelConfig::useLoraPlugin::useLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::GptModelConfig::useMambaConv1dPlugin"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::GptModelConfig::useMambaConv1dPlugin"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::GptModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig9useMedusaEv", "tensorrt_llm::runtime::GptModelConfig::useMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv", "tensorrt_llm::runtime::GptModelConfig::usePackedInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb", "tensorrt_llm::runtime::GptModelConfig::usePackedInput::inputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb", "tensorrt_llm::runtime::GptModelConfig::usePagedKvCache::pagedKvCache"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13usePagedStateEb", "tensorrt_llm::runtime::GptModelConfig::usePagedState"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13usePagedStateEv", "tensorrt_llm::runtime::GptModelConfig::usePagedState"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13usePagedStateEb", "tensorrt_llm::runtime::GptModelConfig::usePagedState::pagedState"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::GptModelConfig::usePromptTuning"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSessionE", "tensorrt_llm::runtime::GptSession"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE", "tensorrt_llm::runtime::GptSession::Config"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxBeamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::Config::Config::maxSequenceLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::ctxMicroBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE", "tensorrt_llm::runtime::GptSession::Config::cudaGraphMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE", "tensorrt_llm::runtime::GptSession::Config::decoderPerRequest"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12decodingModeE", "tensorrt_llm::runtime::GptSession::Config::decodingMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::genMicroBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE", "tensorrt_llm::runtime::GptSession::Config::kvCacheConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE", "tensorrt_llm::runtime::GptSession::Config::maxBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE", "tensorrt_llm::runtime::GptSession::Config::maxBeamWidth"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE", "tensorrt_llm::runtime::GptSession::Config::maxSequenceLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17normalizeLogProbsE", "tensorrt_llm::runtime::GptSession::Config::normalizeLogProbs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create::graph"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch::stream"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::nextContextId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph::runtime"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update::graph"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream::stream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev", "tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfilerE", "tensorrt_llm::runtime::GptSession::GenerationProfiler"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler18GenerationProfilerEv", "tensorrt_llm::runtime::GptSession::GenerationProfiler::GenerationProfiler"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler3endE", "tensorrt_llm::runtime::GptSession::GenerationProfiler::end"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5flagsE", "tensorrt_llm::runtime::GptSession::GenerationProfiler::flags"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler16getElapsedTimeMsEv", "tensorrt_llm::runtime::GptSession::GenerationProfiler::getElapsedTimeMs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler6getEndEv", "tensorrt_llm::runtime::GptSession::GenerationProfiler::getEnd"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler8getStartEv", "tensorrt_llm::runtime::GptSession::GenerationProfiler::getStart"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5startE", "tensorrt_llm::runtime::GptSession::GenerationProfiler::start"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineBuffer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineFile"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::logger"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::sessionConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr", "tensorrt_llm::runtime::GptSession::GptSession::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE", "tensorrt_llm::runtime::GptSession::KvCacheConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE", "tensorrt_llm::runtime::GptSession::KvCacheManager"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE", "tensorrt_llm::runtime::GptSession::LoggerPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::ctxMicroBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::genMicroBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig::pipelineParallelism"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::ctxBatchSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::genBatchSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId::flipFlopId"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId::generationBatchId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxBatches"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxPerGen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE", "tensorrt_llm::runtime::GptSession::MicroBatchConfig::numGenBatches"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE", "tensorrt_llm::runtime::GptSession::TensorPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE", "tensorrt_llm::runtime::GptSession::TokenGeneratedCallback"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType", "tensorrt_llm::runtime::GptSession::createBuffers::numMicroBatches"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsEv", "tensorrt_llm::runtime::GptSession::createContexts"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::decoderPerRequest"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::decodingMode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::logitsType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::maxAttentionWindow"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::numMicroBatches"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode", "tensorrt_llm::runtime::GptSession::createDecoders::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::config"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::maxAttentionWindow"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig", "tensorrt_llm::runtime::GptSession::createKvCacheManager::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput", "tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput", "tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback::outputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::decoderStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::decoderStepAsync::microBatchId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::generationBatchesInputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::generationBatchesOffsets"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager", "tensorrt_llm::runtime::GptSession::executeContextStep::kvCacheManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::kvCacheManager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchOffsets"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesFinished"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesInputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::microBatchesOutputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE", "tensorrt_llm::runtime::GptSession::executeGenerationStep::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType", "tensorrt_llm::runtime::GptSession::finalize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType", "tensorrt_llm::runtime::GptSession::finalize::microBatchId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generate"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generate::generationProfiler"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generate::inputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generate::outputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generate::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generateBatched"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generateBatched::generationProfiler"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generateBatched::microBatchesInputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generateBatched::microBatchesOutputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generateBatched::onTokenGenerated"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE", "tensorrt_llm::runtime::GptSession::generateBatched::samplingConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv", "tensorrt_llm::runtime::GptSession::getBufferManager"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv", "tensorrt_llm::runtime::GptSession::getDevice"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv", "tensorrt_llm::runtime::GptSession::getLogger"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getLogitDataTypeEv", "tensorrt_llm::runtime::GptSession::getLogitDataType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv", "tensorrt_llm::runtime::GptSession::getModelConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession20getNormalizeLogProbsEv", "tensorrt_llm::runtime::GptSession::getNormalizeLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv", "tensorrt_llm::runtime::GptSession::getWorldConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::inputs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::microBatchId"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::outputIds"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::outputs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType", "tensorrt_llm::runtime::GptSession::initDecoder::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::firstBatchIdx"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::kvCacheAddSequences::microBatchId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE", "tensorrt_llm::runtime::GptSession::mBuffers"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE", "tensorrt_llm::runtime::GptSession::mCommEvent"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE", "tensorrt_llm::runtime::GptSession::mCommPtrs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE", "tensorrt_llm::runtime::GptSession::mCommStream"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE", "tensorrt_llm::runtime::GptSession::mCudaGraphInstances"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE", "tensorrt_llm::runtime::GptSession::mCudaGraphMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession26mDecoderMaxAttentionWindowE", "tensorrt_llm::runtime::GptSession::mDecoderMaxAttentionWindow"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE", "tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession23mDecoderSinkTokenLengthE", "tensorrt_llm::runtime::GptSession::mDecoderSinkTokenLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE", "tensorrt_llm::runtime::GptSession::mDecoders"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE", "tensorrt_llm::runtime::GptSession::mDevice"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE", "tensorrt_llm::runtime::GptSession::mIpcMemoryHandles"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE", "tensorrt_llm::runtime::GptSession::mKvCacheManager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE", "tensorrt_llm::runtime::GptSession::mLogger"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE", "tensorrt_llm::runtime::GptSession::mMicroBatchConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE", "tensorrt_llm::runtime::GptSession::mModelConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession18mNormalizeLogProbsE", "tensorrt_llm::runtime::GptSession::mNormalizeLogProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE", "tensorrt_llm::runtime::GptSession::mPipelineComm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE", "tensorrt_llm::runtime::GptSession::mReceivedEvents"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE", "tensorrt_llm::runtime::GptSession::mRuntime"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE", "tensorrt_llm::runtime::GptSession::mWorldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config", "tensorrt_llm::runtime::GptSession::setup"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config", "tensorrt_llm::runtime::GptSession::setup::sessionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::GptSession::shouldStopSync::microBatchId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv", "tensorrt_llm::runtime::GptSession::useCudaGraphs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::batchSlots"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::contextLengths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::draftTokenIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::finishedFinal"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::finishedSum"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::finishedVec"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::numDraftTokens"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::sequenceLengths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds::targetTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::batchSlots"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::curandState"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::draftLogits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::draftProbs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::finished"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::numDraftTokens"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::randomAcceptThreshold"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::targetLogits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::targetProbs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::useRandomAcceptThreshold"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits::vocabSizePadded"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::maxNumMedusaHeads"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::maxTokensPerStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forward::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::decodingOutput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::finalOutputIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager", "tensorrt_llm::runtime::IGptDecoder::gatherTree::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::IGptDecoder::setup::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::acceptedOffsets"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::dtype"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::maxAttentionWindow"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::maxBlocksPerSeq"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::packedAcceptedIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::pastKeyValueLengths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::pointerArray"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::rewindDraftTokenCount"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::stream"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE", "tensorrt_llm::runtime::IGptDecoderBatch"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::CudaStreamPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv", "tensorrt_llm::runtime::IGptDecoderBatch::IGptDecoderBatch"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TensorPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE", "tensorrt_llm::runtime::IGptDecoderBatch::TokenPtr"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::finalize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::finalize::batchIdx"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forward::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE", "tensorrt_llm::runtime::IGptDecoderBatch::forwardSync::token"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs::batchIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv", "tensorrt_llm::runtime::IGptDecoderBatch::getFinished"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs::batchIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch30getMedusaAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::IGptDecoderBatch::getMedusaAcceptedLengthsCumSum"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch28getMedusaAcceptedPackedPathsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getMedusaAcceptedPackedPaths"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getNbSteps"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch18getNextDraftTokensEv", "tensorrt_llm::runtime::IGptDecoderBatch::getNextDraftTokens"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType", "tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds::batchIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv", "tensorrt_llm::runtime::IGptDecoderBatch::getParentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::IGptDecoderBatch::newRequests"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::IGptDecoderBatch::newRequests::requests"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::IGptDecoderBatch::newRequests::samplingConfigs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::IGptDecoderBatch::newRequests::seqSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE", "tensorrt_llm::runtime::IStatefulGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv", "tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv", "tensorrt_llm::runtime::IStatefulGptDecoder::finalize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forward::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync::output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv", "tensorrt_llm::runtime::IStatefulGptDecoder::forwardSync"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getAllNewTokens"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getCumLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType", "tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens::iter"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv", "tensorrt_llm::runtime::IStatefulGptDecoder::getOutputIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::inputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::outputs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::newBatch::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::dtype"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::fusedDecoder"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxAttentionWindow"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBatchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxBeamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxSequenceLength"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::maxTokensPerStep"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::mode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig", "tensorrt_llm::runtime::IStatefulGptDecoder::setup::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev", "tensorrt_llm::runtime::IStatefulGptDecoder::~IStatefulGptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE", "tensorrt_llm::runtime::ITensor::DimType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 5, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 6, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 5, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 6, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE", "tensorrt_llm::runtime::IpcMemory::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrsTensor"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE", "tensorrt_llm::runtime::IpcMemory::mBufferPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE", "tensorrt_llm::runtime::IpcMemory::mBufferSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE", "tensorrt_llm::runtime::IpcMemory::mWorldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE8SizeType", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE8SizeType", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE8SizeType", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE8SizeType", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE8SizeType", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE8SizeType", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK8SizeType", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE8SizeType", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE8SizeType", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE8SizeType", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE8SizeType", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE8SizeType", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE8SizeType", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE8SizeType", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE8SizeType", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE8SizeType8SizeType", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE8SizeType", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE8SizeType", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE8SizeType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE8SizeType", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11MambaConfigE", "tensorrt_llm::runtime::MambaConfig"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11MambaConfig5dConvE", "tensorrt_llm::runtime::MambaConfig::dConv"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11MambaConfig6dStateE", "tensorrt_llm::runtime::MambaConfig::dState"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11MambaConfig6expandE", "tensorrt_llm::runtime::MambaConfig::expand"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE", "tensorrt_llm::runtime::MemoryCounters::SizeType"], [1, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 6, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 5, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 6, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE", "tensorrt_llm::runtime::PhonyNameDueToError::name"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE", "tensorrt_llm::runtime::PhonyNameDueToError::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE", "tensorrt_llm::runtime::PhonyNameDueToError::type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE", "tensorrt_llm::runtime::PhonyNameDueToError::value"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE", "tensorrt_llm::runtime::PromptTuningParams::SizeType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25SpeculativeDecodingConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25SpeculativeDecodingConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25SpeculativeDecodingConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::specDecodingConfig"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig3VecE", "tensorrt_llm::runtime::SamplingConfig::Vec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig3VecE", "tensorrt_llm::runtime::SamplingConfig::Vec::T"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE8SizeTypeEEE", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE8SizeTypeEEE", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE8SizeTypeEEE", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE8SizeTypeEEE", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 6, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE", "tensorrt_llm::runtime::SizeType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 4, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 4, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 5, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 5, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 5, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 6, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 6, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE", "tensorrt_llm::runtime::decoder::Input"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr", "tensorrt_llm::runtime::decoder::Input::Input::logits"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE", "tensorrt_llm::runtime::decoder::Input::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Input::cacheIndirection"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE", "tensorrt_llm::runtime::decoder::Input::logits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE", "tensorrt_llm::runtime::decoder::Output"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv", "tensorrt_llm::runtime::decoder::Output::Output"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE", "tensorrt_llm::runtime::decoder::Output::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE", "tensorrt_llm::runtime::decoder::Output::cacheIndirection"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE", "tensorrt_llm::runtime::decoder::Output::sequenceLengths"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE", "tensorrt_llm::runtime::decoder_batch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE", "tensorrt_llm::runtime::decoder_batch::Input"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::active"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Input::Input::logits"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Input::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE", "tensorrt_llm::runtime::decoder_batch::Input::active"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE", "tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE", "tensorrt_llm::runtime::decoder_batch::Input::logits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input12medusaLogitsE", "tensorrt_llm::runtime::decoder_batch::Input::medusaLogits"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE", "tensorrt_llm::runtime::decoder_batch::Output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE", "tensorrt_llm::runtime::decoder_batch::Request"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE", "tensorrt_llm::runtime::decoder_batch::Request::BufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::ConstTensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::endId"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::ids"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::inputLen"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE", "tensorrt_llm::runtime::decoder_batch::Request::Request::maxNewTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE", "tensorrt_llm::runtime::decoder_batch::Request::TensorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::badWordsList"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE", "tensorrt_llm::runtime::decoder_batch::Request::computeCumLogProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE", "tensorrt_llm::runtime::decoder_batch::Request::computeLogProbs"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE", "tensorrt_llm::runtime::decoder_batch::Request::draftLogits"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE", "tensorrt_llm::runtime::decoder_batch::Request::draftTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE", "tensorrt_llm::runtime::decoder_batch::Request::embeddingBias"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE", "tensorrt_llm::runtime::decoder_batch::Request::endId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE", "tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerEngineStep"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE", "tensorrt_llm::runtime::decoder_batch::Request::ids"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE", "tensorrt_llm::runtime::decoder_batch::Request::inputLen"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE", "tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE", "tensorrt_llm::runtime::decoder_batch::Request::medusaPaths"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE", "tensorrt_llm::runtime::decoder_batch::Request::medusaTreeIds"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE", "tensorrt_llm::runtime::decoder_batch::Request::stopWordsList"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE", "tensorrt_llm::runtime::decoder_batch::Token"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::active"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE", "tensorrt_llm::runtime::decoder_batch::Token::Token::event"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE", "tensorrt_llm::runtime::decoder_batch::Token::active"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE", "tensorrt_llm::runtime::decoder_batch::Token::event"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::enable"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb", "tensorrt_llm::runtime::setPeerAccess::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime5utilsE", "tensorrt_llm::runtime::utils"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE", "tensorrt_llm::runtime::utils::loadEngine::enginePath"], [34, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[29, 9, 0, "-", "functional"], [31, 9, 0, "-", "models"], [32, 9, 0, "-", "plugin"], [33, 9, 0, "-", "quantization"], [34, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[29, 10, 1, "", "AllReduceConfig"], [29, 10, 1, "", "AllReduceStrategy"], [29, 10, 1, "", "AttentionMaskType"], [29, 10, 1, "", "DimRange"], [29, 10, 1, "", "LayerNormPositionType"], [29, 10, 1, "", "LayerNormType"], [29, 10, 1, "", "MLPType"], [29, 10, 1, "", "PositionEmbeddingType"], [29, 10, 1, "", "RotaryScalingType"], [29, 10, 1, "", "Tensor"], [29, 14, 1, "", "abs"], [29, 14, 1, "", "activation"], [29, 14, 1, "", "add"], [29, 14, 1, "", "allgather"], [29, 14, 1, "", "allreduce"], [29, 14, 1, "", "arange"], [29, 14, 1, "", "argmax"], [29, 14, 1, "", "assertion"], [29, 14, 1, "", "avg_pool2d"], [29, 14, 1, "", "bert_attention"], [29, 14, 1, "", "broadcast_helper"], [29, 14, 1, "", "cast"], [29, 14, 1, "", "chunk"], [29, 14, 1, "", "clip"], [29, 14, 1, "", "concat"], [29, 14, 1, "", "conditional"], [29, 14, 1, "", "constant"], [29, 14, 1, "", "constant_to_tensor_"], [29, 14, 1, "", "conv1d"], [29, 14, 1, "", "conv2d"], [29, 14, 1, "", "conv_transpose2d"], [29, 14, 1, "", "cos"], [29, 14, 1, "", "cumsum"], [29, 14, 1, "", "div"], [29, 14, 1, "", "einsum"], [29, 14, 1, "", "elementwise_binary"], [29, 14, 1, "", "embedding"], [29, 14, 1, "", "eq"], [29, 14, 1, "", "exp"], [29, 14, 1, "", "expand"], [29, 14, 1, "", "expand_dims"], [29, 14, 1, "", "expand_dims_like"], [29, 14, 1, "", "expand_mask"], [29, 14, 1, "", "flip"], [29, 14, 1, "", "gather"], [29, 14, 1, "", "gather_last_token_logits"], [29, 14, 1, "", "geglu"], [29, 14, 1, "", "gelu"], [29, 14, 1, "", "generate_alibi_biases"], [29, 14, 1, "", "generate_alibi_slopes"], [29, 14, 1, "", "gpt_attention"], [29, 14, 1, "", "group_norm"], [29, 14, 1, "", "gt"], [29, 14, 1, "", "identity"], [29, 14, 1, "", "index_select"], [29, 14, 1, "", "interpolate"], [29, 14, 1, "", "is_gated_activation"], [29, 14, 1, "", "layer_norm"], [29, 14, 1, "", "log"], [29, 14, 1, "", "lora_plugin"], [29, 14, 1, "", "lt"], [29, 14, 1, "", "mamba_conv1d"], [29, 14, 1, "", "masked_scatter"], [29, 14, 1, "", "masked_select"], [29, 14, 1, "", "matmul"], [29, 14, 1, "", "max"], [29, 14, 1, "", "maximum"], [29, 14, 1, "", "mean"], [29, 14, 1, "", "minimum"], [29, 14, 1, "", "mul"], [29, 14, 1, "", "non_gated_version"], [29, 14, 1, "", "op_and"], [29, 14, 1, "", "op_or"], [29, 14, 1, "", "outer"], [29, 14, 1, "", "permute"], [29, 14, 1, "", "pow"], [29, 14, 1, "", "recv"], [29, 14, 1, "", "relu"], [29, 14, 1, "", "repeat_interleave"], [29, 14, 1, "", "rms_norm"], [29, 14, 1, "", "round"], [29, 14, 1, "", "select"], [29, 14, 1, "", "selective_scan"], [29, 14, 1, "", "send"], [29, 14, 1, "", "shape"], [29, 14, 1, "", "sigmoid"], [29, 14, 1, "", "silu"], [29, 14, 1, "", "sin"], [29, 14, 1, "", "slice"], [29, 14, 1, "", "softmax"], [29, 14, 1, "", "softplus"], [29, 14, 1, "", "split"], [29, 14, 1, "", "sqrt"], [29, 14, 1, "", "squared_relu"], [29, 14, 1, "", "stack"], [29, 14, 1, "", "sub"], [29, 14, 1, "", "sum"], [29, 14, 1, "", "swiglu"], [29, 14, 1, "", "tanh"], [29, 14, 1, "", "topk"], [29, 14, 1, "", "transpose"], [29, 14, 1, "", "unary"], [29, 14, 1, "", "unsqueeze"], [29, 14, 1, "", "view"], [29, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceConfig": [[29, 11, 1, "", "PUSH_MODE"], [29, 11, 1, "", "USE_MEMCPY"]], "tensorrt_llm.functional.AllReduceStrategy": [[29, 11, 1, "", "AUTO"], [29, 11, 1, "", "NCCL"], [29, 11, 1, "", "ONESHOT"], [29, 11, 1, "", "TWOSHOT"]], "tensorrt_llm.functional.AttentionMaskType": [[29, 11, 1, "", "bidirectional"], [29, 11, 1, "", "bidirectionalglm"], [29, 11, 1, "", "causal"], [29, 11, 1, "", "padding"]], "tensorrt_llm.functional.LayerNormPositionType": [[29, 11, 1, "", "post_layernorm"], [29, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[29, 11, 1, "", "GroupNorm"], [29, 11, 1, "", "LayerNorm"], [29, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[29, 11, 1, "", "FusedGatedMLP"], [29, 11, 1, "", "GatedMLP"], [29, 11, 1, "", "MLP"]], "tensorrt_llm.functional.PositionEmbeddingType": [[29, 11, 1, "", "alibi"], [29, 11, 1, "", "alibi_with_scale"], [29, 11, 1, "", "chatglm"], [29, 12, 1, "", "choices"], [29, 12, 1, "", "from_string"], [29, 12, 1, "", "is_alibi"], [29, 12, 1, "", "is_rope"], [29, 11, 1, "", "learned_absolute"], [29, 11, 1, "", "relative"], [29, 11, 1, "", "rope_gpt_neox"], [29, 11, 1, "", "rope_gptj"]], "tensorrt_llm.functional.RotaryScalingType": [[29, 11, 1, "", "dynamic"], [29, 11, 1, "", "linear"], [29, 11, 1, "", "none"]], "tensorrt_llm.functional.Tensor": [[29, 12, 1, "", "abs"], [29, 12, 1, "", "cast"], [29, 13, 1, "", "dtype"], [29, 12, 1, "", "get_parent"], [29, 12, 1, "", "get_users"], [29, 12, 1, "", "is_dynamic"], [29, 12, 1, "", "is_trt_wrapper"], [29, 13, 1, "", "location"], [29, 12, 1, "", "log"], [29, 12, 1, "", "mark_output"], [29, 12, 1, "", "max"], [29, 12, 1, "", "mean"], [29, 13, 1, "", "name"], [29, 12, 1, "", "ndim"], [29, 13, 1, "", "network"], [29, 12, 1, "", "permute"], [29, 12, 1, "", "rank"], [29, 12, 1, "", "replace_all_uses_with"], [29, 13, 1, "", "shape"], [29, 12, 1, "", "size"], [29, 12, 1, "", "split"], [29, 12, 1, "", "sqrt"], [29, 12, 1, "", "transpose"], [29, 12, 1, "", "view"]], "tensorrt_llm.layers": [[30, 9, 0, "-", "activation"], [30, 9, 0, "-", "attention"], [30, 9, 0, "-", "cast"], [30, 9, 0, "-", "conv"], [30, 9, 0, "-", "embedding"], [30, 9, 0, "-", "linear"], [30, 9, 0, "-", "mlp"], [30, 9, 0, "-", "normalization"], [30, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[30, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[30, 10, 1, "", "Attention"], [30, 10, 1, "", "AttentionParams"], [30, 10, 1, "", "BertAttention"], [30, 10, 1, "", "KeyValueCacheParams"], [30, 10, 1, "", "RopeEmbeddingUtils"], [30, 14, 1, "", "compute_relative_bias"], [30, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.AttentionParams": [[30, 12, 1, "", "is_valid"], [30, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[30, 12, 1, "", "fill_none_tensor_list"], [30, 12, 1, "", "get_first_past_key_value"], [30, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.attention.RopeEmbeddingUtils": [[30, 12, 1, "", "apply_rotary_pos_emb"], [30, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [30, 12, 1, "", "create_sinusoidal_positions"], [30, 12, 1, "", "rotate_every_two"], [30, 12, 1, "", "rotate_half"]], "tensorrt_llm.layers.cast": [[30, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[30, 10, 1, "", "Conv1d"], [30, 10, 1, "", "Conv2d"], [30, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[30, 10, 1, "", "Embedding"], [30, 10, 1, "", "PromptTuningEmbedding"]], "tensorrt_llm.layers.embedding.Embedding": [[30, 12, 1, "", "forward"], [30, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[30, 11, 1, "", "ColumnLinear"], [30, 10, 1, "", "Linear"], [30, 10, 1, "", "ParallelLMHead"], [30, 10, 1, "", "QKVColumnLinear"], [30, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[30, 12, 1, "", "forward"], [30, 12, 1, "", "multiply_gather"], [30, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.ParallelLMHead": [[30, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.QKVColumnLinear": [[30, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[30, 12, 1, "", "forward"], [30, 12, 1, "", "multiply_reduce"], [30, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.mlp": [[30, 10, 1, "", "FusedGatedMLP"], [30, 10, 1, "", "GatedMLP"], [30, 10, 1, "", "MLP"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[30, 10, 1, "", "GroupNorm"], [30, 10, 1, "", "LayerNorm"], [30, 10, 1, "", "RmsNorm"]], "tensorrt_llm.layers.normalization.GroupNorm": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[30, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[30, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[30, 12, 1, "", "forward"]], "tensorrt_llm.models": [[31, 10, 1, "", "BaichuanForCausalLM"], [31, 10, 1, "", "BertForQuestionAnswering"], [31, 10, 1, "", "BertForSequenceClassification"], [31, 10, 1, "", "BertModel"], [31, 10, 1, "", "BloomForCausalLM"], [31, 10, 1, "", "BloomModel"], [31, 10, 1, "", "ChatGLMForCausalLM"], [31, 10, 1, "", "ChatGLMModel"], [31, 10, 1, "", "DecoderModel"], [31, 10, 1, "", "EncoderModel"], [31, 10, 1, "", "FalconForCausalLM"], [31, 10, 1, "", "FalconModel"], [31, 10, 1, "", "GPTForCausalLM"], [31, 10, 1, "", "GPTJForCausalLM"], [31, 10, 1, "", "GPTJModel"], [31, 10, 1, "", "GPTModel"], [31, 10, 1, "", "GPTNeoXForCausalLM"], [31, 10, 1, "", "GPTNeoXModel"], [31, 10, 1, "", "GemmaForCausalLM"], [31, 10, 1, "", "LLaMAForCausalLM"], [31, 10, 1, "", "LLaMAModel"], [31, 10, 1, "", "MPTForCausalLM"], [31, 10, 1, "", "MPTModel"], [31, 10, 1, "", "MambaLMHeadModel"], [31, 10, 1, "", "MedusaForCausalLm"], [31, 10, 1, "", "OPTForCausalLM"], [31, 10, 1, "", "OPTModel"], [31, 10, 1, "", "PhiForCausalLM"], [31, 10, 1, "", "PhiModel"], [31, 10, 1, "", "PretrainedConfig"], [31, 10, 1, "", "PretrainedModel"], [31, 10, 1, "", "QWenForCausalLM"], [31, 10, 1, "", "WhisperEncoder"], [31, 14, 1, "", "quantize_model"]], "tensorrt_llm.models.BertForQuestionAnswering": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[31, 12, 1, "", "check_config"], [31, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.ChatGLMModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.DecoderModel": [[31, 12, 1, "", "forward"], [31, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[31, 12, 1, "", "forward"], [31, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.FalconForCausalLM": [[31, 12, 1, "", "check_config"]], "tensorrt_llm.models.FalconModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTForCausalLM": [[31, 12, 1, "", "check_config"], [31, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJForCausalLM": [[31, 12, 1, "", "check_config"]], "tensorrt_llm.models.GPTJModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaForCausalLM": [[31, 12, 1, "", "check_config"], [31, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LLaMAForCausalLM": [[31, 12, 1, "", "check_config"], [31, 12, 1, "", "default_plugin_config"], [31, 12, 1, "", "from_hugging_face"], [31, 12, 1, "", "from_meta_ckpt"], [31, 12, 1, "", "quantize"], [31, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.MPTForCausalLM": [[31, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaLMHeadModel": [[31, 12, 1, "", "forward"], [31, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaForCausalLm": [[31, 12, 1, "", "forward"], [31, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.OPTForCausalLM": [[31, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[31, 12, 1, "", "check_config"], [31, 12, 1, "", "convert_hf_checkpoint"]], "tensorrt_llm.models.PhiModel": [[31, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[31, 12, 1, "", "from_dict"], [31, 12, 1, "", "from_json_file"], [31, 13, 1, "", "quant_mode"], [31, 12, 1, "", "set_if_not_exist"], [31, 12, 1, "", "set_rank"], [31, 12, 1, "", "to_dict"]], "tensorrt_llm.models.PretrainedModel": [[31, 12, 1, "", "check_config"], [31, 12, 1, "", "from_checkpoint"], [31, 12, 1, "", "from_config"], [31, 12, 1, "", "load"], [31, 12, 1, "", "load_partial_weights"], [31, 12, 1, "", "prepare_inputs"], [31, 12, 1, "", "quantize"], [31, 12, 1, "", "release"], [31, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.QWenForCausalLM": [[31, 12, 1, "", "check_config"]], "tensorrt_llm.models.WhisperEncoder": [[31, 12, 1, "", "forward"], [31, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[32, 10, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[32, 12, 1, "", "to_legacy_setting"]], "tensorrt_llm.quantization": [[33, 10, 1, "", "QuantAlgo"], [33, 10, 1, "", "QuantMode"], [33, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[34, 10, 1, "", "ChatGLMGenerationSession"], [34, 10, 1, "", "GenerationSequence"], [34, 10, 1, "", "GenerationSession"], [34, 10, 1, "", "KVCacheManager"], [34, 10, 1, "", "LogitsProcessor"], [34, 10, 1, "", "LogitsProcessorList"], [34, 10, 1, "", "MambaLMHeadModelGenerationSession"], [34, 10, 1, "", "ModelConfig"], [34, 10, 1, "", "ModelRunner"], [34, 10, 1, "", "ModelRunnerCpp"], [34, 10, 1, "", "QWenForCausalLMGenerationSession"], [34, 10, 1, "", "SamplingConfig"], [34, 10, 1, "", "Session"], [34, 10, 1, "", "StoppingCriteria"], [34, 10, 1, "", "StoppingCriteriaList"], [34, 10, 1, "", "TensorInfo"], [34, 14, 1, "", "to_word_list_format"]], "tensorrt_llm.runtime.GenerationSequence": [[34, 12, 1, "", "get_batch_idx"], [34, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[34, 11, 1, "", "batch_size"], [34, 11, 1, "", "buffer_allocated"], [34, 13, 1, "", "cross_attention"], [34, 11, 1, "", "cuda_graph_mode"], [34, 12, 1, "", "cuda_stream_guard"], [34, 11, 1, "", "debug_mode"], [34, 11, 1, "", "debug_tensors_to_save"], [34, 12, 1, "", "decode"], [34, 12, 1, "", "decode_batch"], [34, 12, 1, "", "decode_regular"], [34, 12, 1, "", "decode_stream"], [34, 11, 1, "", "device"], [34, 13, 1, "", "dtype"], [34, 12, 1, "", "dump_debug_buffers"], [34, 12, 1, "", "early_stop_criteria"], [34, 12, 1, "", "filter_medusa_logits"], [34, 12, 1, "", "finalize_decoder"], [34, 12, 1, "", "find_best_medusa_path"], [34, 13, 1, "", "first_layer"], [34, 13, 1, "", "gather_context_logits"], [34, 13, 1, "", "gather_generation_logits"], [34, 12, 1, "", "get_next_medusa_tokens"], [34, 12, 1, "", "handle_per_step"], [34, 13, 1, "", "has_position_embedding"], [34, 13, 1, "", "has_token_type_embedding"], [34, 13, 1, "", "head_size"], [34, 13, 1, "", "hidden_size"], [34, 13, 1, "", "is_medusa_mode"], [34, 13, 1, "", "last_layer"], [34, 11, 1, "", "mapping"], [34, 13, 1, "", "max_medusa_tokens"], [34, 13, 1, "", "max_prompt_embedding_table_size"], [34, 11, 1, "", "medusa_paths"], [34, 11, 1, "", "medusa_position_offsets"], [34, 11, 1, "", "medusa_temperature"], [34, 11, 1, "", "medusa_topks"], [34, 11, 1, "", "medusa_tree_ids"], [34, 12, 1, "", "next_medusa_input_ids"], [34, 13, 1, "", "num_heads"], [34, 13, 1, "", "num_heads_kv"], [34, 13, 1, "", "num_layers"], [34, 13, 1, "", "num_medusa_heads"], [34, 11, 1, "", "num_medusa_tokens"], [34, 13, 1, "", "paged_kv_cache"], [34, 13, 1, "", "paged_state"], [34, 12, 1, "", "pp_communicate_final_output_ids"], [34, 12, 1, "", "pp_communicate_new_tokens"], [34, 12, 1, "", "process_logits_for_medusa_mode"], [34, 13, 1, "", "quant_mode"], [34, 13, 1, "", "remove_input_padding"], [34, 11, 1, "", "runtime"], [34, 12, 1, "", "setup"], [34, 13, 1, "", "tokens_per_block"], [34, 12, 1, "", "update_kv_cache_draft_token_location"], [34, 12, 1, "", "update_output_ids_by_offset"], [34, 13, 1, "", "use_context_fmha_for_generation"], [34, 13, 1, "", "use_custom_all_reduce"], [34, 13, 1, "", "use_gpt_attention_plugin"], [34, 13, 1, "", "use_lora_plugin"], [34, 13, 1, "", "use_mamba_conv1d_plugin"], [34, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[34, 12, 1, "", "add_sequence"], [34, 12, 1, "", "get_block_pointers"], [34, 12, 1, "", "step"]], "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession": [[34, 13, 1, "", "mamba_d_conv"], [34, 13, 1, "", "mamba_d_state"], [34, 13, 1, "", "mamba_expand"], [34, 12, 1, "", "setup"]], "tensorrt_llm.runtime.ModelConfig": [[34, 11, 1, "", "cross_attention"], [34, 11, 1, "", "dtype"], [34, 11, 1, "", "gather_context_logits"], [34, 11, 1, "", "gather_generation_logits"], [34, 11, 1, "", "gpt_attention_plugin"], [34, 11, 1, "", "has_position_embedding"], [34, 11, 1, "", "has_token_type_embedding"], [34, 11, 1, "", "head_size"], [34, 11, 1, "", "hidden_size"], [34, 11, 1, "", "lora_plugin"], [34, 11, 1, "", "lora_target_modules"], [34, 11, 1, "", "mamba_conv1d_plugin"], [34, 11, 1, "", "mamba_d_conv"], [34, 11, 1, "", "mamba_d_state"], [34, 11, 1, "", "mamba_expand"], [34, 11, 1, "", "max_batch_size"], [34, 11, 1, "", "max_beam_width"], [34, 11, 1, "", "max_medusa_tokens"], [34, 11, 1, "", "max_prompt_embedding_table_size"], [34, 11, 1, "", "model_name"], [34, 11, 1, "", "num_heads"], [34, 11, 1, "", "num_kv_heads"], [34, 11, 1, "", "num_layers"], [34, 11, 1, "", "num_medusa_heads"], [34, 11, 1, "", "paged_kv_cache"], [34, 11, 1, "", "paged_state"], [34, 11, 1, "", "quant_mode"], [34, 11, 1, "", "remove_input_padding"], [34, 11, 1, "", "skip_cross_qkv"], [34, 11, 1, "", "tokens_per_block"], [34, 11, 1, "", "trtllm_modules_to_hf_modules"], [34, 11, 1, "", "use_context_fmha_for_generation"], [34, 11, 1, "", "use_custom_all_reduce"], [34, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[34, 13, 1, "", "dtype"], [34, 12, 1, "", "from_dir"], [34, 12, 1, "", "from_engine"], [34, 13, 1, "", "gather_context_logits"], [34, 13, 1, "", "gather_generation_logits"], [34, 12, 1, "", "generate"], [34, 13, 1, "", "hidden_size"], [34, 13, 1, "", "max_prompt_embedding_table_size"], [34, 13, 1, "", "max_sequence_length"], [34, 13, 1, "", "num_heads"], [34, 13, 1, "", "num_layers"], [34, 13, 1, "", "remove_input_padding"], [34, 12, 1, "", "serialize_engine"], [34, 13, 1, "", "use_lora_plugin"], [34, 13, 1, "", "vocab_size"], [34, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[34, 13, 1, "", "dtype"], [34, 12, 1, "", "from_dir"], [34, 13, 1, "", "gather_context_logits"], [34, 13, 1, "", "gather_generation_logits"], [34, 12, 1, "", "generate"], [34, 13, 1, "", "hidden_size"], [34, 13, 1, "", "max_prompt_embedding_table_size"], [34, 13, 1, "", "max_sequence_length"], [34, 13, 1, "", "num_heads"], [34, 13, 1, "", "num_layers"], [34, 13, 1, "", "remove_input_padding"], [34, 13, 1, "", "vocab_size"], [34, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[34, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[34, 11, 1, "", "bad_words_list"], [34, 11, 1, "", "beam_search_diversity_rate"], [34, 11, 1, "", "early_stopping"], [34, 11, 1, "", "end_id"], [34, 11, 1, "", "frequency_penalty"], [34, 11, 1, "", "length_penalty"], [34, 11, 1, "", "max_attention_window_size"], [34, 11, 1, "", "max_new_tokens"], [34, 11, 1, "", "min_length"], [34, 11, 1, "", "num_beams"], [34, 11, 1, "", "output_cum_log_probs"], [34, 11, 1, "", "output_log_probs"], [34, 11, 1, "", "output_sequence_lengths"], [34, 11, 1, "", "pad_id"], [34, 11, 1, "", "presence_penalty"], [34, 11, 1, "", "random_seed"], [34, 11, 1, "", "repetition_penalty"], [34, 11, 1, "", "return_dict"], [34, 11, 1, "", "sink_token_length"], [34, 11, 1, "", "stop_words_list"], [34, 11, 1, "", "temperature"], [34, 11, 1, "", "top_k"], [34, 11, 1, "", "top_p"], [34, 11, 1, "", "top_p_decay"], [34, 11, 1, "", "top_p_min"], [34, 11, 1, "", "top_p_reset_ids"], [34, 12, 1, "", "update"], [34, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[34, 13, 1, "", "context"], [34, 13, 1, "", "engine"], [34, 12, 1, "", "from_engine"], [34, 12, 1, "", "from_serialized_engine"], [34, 12, 1, "", "infer_shapes"], [34, 12, 1, "", "run"], [34, 13, 1, "", "runtime"], [34, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[34, 11, 1, "", "dtype"], [34, 11, 1, "", "name"], [34, 11, 1, "", "shape"]]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:enum", "3": "cpp:enumerator", "4": "cpp:class", "5": "cpp:function", "6": "cpp:functionParam", "7": "cpp:member", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function"}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "enum", "C++ enum"], "3": ["cpp", "enumerator", "C++ enumerator"], "4": ["cpp", "class", "C++ class"], "5": ["cpp", "function", "C++ function"], "6": ["cpp", "functionParam", "C++ function parameter"], "7": ["cpp", "member", "C++ member"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"]}, "titleterms": {"executor": [0, 19], "h": [0, 1], "tensor": [0, 3, 4, 6, 8, 36], "type": [0, 27], "runtim": [1, 5, 8, 11, 21, 22, 27, 34, 36], "buffermanag": 1, "common": [1, 25], "cudaev": 1, "cudastream": 1, "decodinginput": 1, "decodingoutput": 1, "generationinput": 1, "generationoutput": 1, "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptmodelconfig": 1, "gptsession": 1, "ibuff": 1, "igptdecoderbatch": 1, "istatefulgptdecod": 1, "itensor": 1, "ipcutil": 1, "memorycount": 1, "prompttuningparam": 1, "samplingconfig": 1, "tllmlogger": 1, "worldconfig": 1, "decodingmod": 1, "loracach": [1, 8], "loracachepagemanagerconfig": 1, "loramodul": 1, "The": [2, 5, 19, 37], "batch": [2, 4, 5, 25, 27], "manag": [2, 6], "tensorrt": [2, 3, 10, 11, 12, 13, 15, 16, 18, 20, 21, 22, 25, 27, 28, 35, 36, 40, 41], "llm": [2, 3, 10, 12, 13, 15, 16, 18, 20, 21, 22, 25, 27, 28, 36, 40, 41], "api": [2, 6, 13, 19, 40], "get": [2, 20], "send": [2, 35], "callback": 2, "request": [2, 7, 19, 35], "interrupt": 2, "statist": 2, "logit": [2, 19], "post": [2, 19], "processor": [2, 19], "option": [2, 19, 21, 26, 27], "other": 2, "mandatori": 2, "gptmanag": [2, 8], "paramet": [2, 5], "respons": [2, 19], "content": 2, "design": 2, "multi": [2, 4, 11, 25, 26, 27], "gpu": [2, 11, 14, 25, 26, 27, 28, 36], "execut": [2, 26, 39], "In": [2, 4, 5, 25, 27], "flight": [2, 4, 5, 25, 27], "triton": [2, 35], "infer": [2, 7, 18, 25, 26, 35, 36], "server": [2, 35], "expert": 3, "parallel": [3, 27], "mixtur": 3, "moe": 3, "v": [3, 16], "how": [3, 27], "enabl": 3, "head": [4, 27], "queri": 4, "group": 4, "attent": [4, 10, 25, 27, 30], "import": 4, "note": [4, 40], "pad": [4, 27], "pack": 4, "context": [4, 27], "gener": 4, "phase": 4, "fp8": [4, 10, 15, 25, 28, 37], "fmha": 4, "xqa": [4, 17], "optim": [4, 27], "chunk": [4, 27], "kv": [4, 10, 27, 36], "cach": [4, 10, 27, 36], "contigu": 4, "page": [4, 25, 27], "int8": [4, 37], "slide": 4, "window": [4, 22, 24, 25, 27], "cyclic": 4, "roll": 4, "buffer": 4, "streamingllm": 4, "beam": 4, "search": 4, "input": [4, 27], "qkv": 4, "rotari": 4, "posit": 4, "embed": [4, 27, 30], "rope": 4, "alibi": 4, "scale": [4, 10], "factor": [4, 10], "": [4, 15, 18, 20], "cross": 4, "rel": 4, "bia": 4, "rab": 4, "c": [5, 19, 21, 22, 36], "gpt": [5, 8, 27, 28], "overview": [5, 10, 13, 25, 28], "model": [5, 9, 11, 12, 26, 27, 28, 31, 35, 39, 40, 41], "configur": [5, 8], "world": 5, "sampl": [5, 41], "session": 5, "intern": 5, "compon": 5, "support": [5, 11, 21, 25, 37, 38], "know": 5, "issu": [5, 28, 36, 40], "futur": 5, "chang": [5, 40], "graph": 6, "rewrit": 6, "modul": [6, 8], "when": 6, "us": [6, 8, 41], "relat": 6, "method": [6, 18], "flayerinfo": 6, "retriev": [6, 35], "high": 6, "level": 6, "inform": 6, "function": [6, 29], "pattern": [6, 11], "record_signatur": 6, "decor": 6, "requir": 6, "classic": 6, "workflow": [6, 13], "run": [8, 22, 28, 35], "2b": 8, "lora": 8, "cpp": 8, "format": 8, "detail": [8, 37], "exampl": [8, 10, 19, 26], "id": 8, "map": 8, "ad": 9, "checkpoint": 10, "prepar": 10, "config": 10, "rank": [10, 26], "weight": [10, 11, 12, 35, 36, 37], "mlp": [10, 27, 30], "layernorm": 10, "quantiz": [10, 13, 18, 33, 37], "awq": [10, 14, 37], "build": [10, 13, 21, 22, 27, 28], "engin": [10, 11, 28, 35], "make": 10, "evalu": 10, "definit": 11, "compil": [11, 35], "bind": [11, 19, 21], "match": 11, "fusion": [11, 27], "plugin": [11, 27, 32], "node": [11, 25], "architectur": [12, 20], "convers": 13, "cli": 13, "tool": 13, "falcon": [14, 28], "180b": [14, 28], "singl": [14, 26], "h200": [14, 16, 17, 28], "int4": [14, 37], "6": 14, "7x": 14, "faster": 14, "llama": [14, 17, 28], "70b": [14, 17, 28], "over": 14, "a100": [14, 15, 28], "up": [14, 17, 18, 27], "close": [14, 17], "h100": [15, 16, 28], "ha": 15, "4": 15, "6x": 15, "perform": [15, 18, 20, 26, 27, 41], "achiev": [15, 16], "10": 15, "000": [15, 16], "tok": 15, "100m": 15, "first": [15, 28], "token": [15, 16, 27, 28], "mlperf": 15, "what": [15, 18, 25], "i": 15, "nearli": 16, "12": 16, "sec": 16, "llama2": [16, 28], "13b": 16, "latest": [16, 25], "hbm": 16, "memori": [16, 27, 36], "new": 17, "kernel": 17, "provid": 17, "2": [17, 21], "4x": 17, "more": 17, "throughput": [17, 28], "within": 17, "same": 17, "latenc": [17, 28], "budget": 17, "increas": 17, "speed": 18, "sota": 18, "techniqu": 18, "trt": 18, "benchmark": [18, 28], "accuraci": 18, "best": [18, 27], "practic": [18, 27], "choos": 18, "right": 18, "come": 18, "next": [18, 35], "class": 19, "result": [19, 28], "python": [19, 21, 36], "welcom": 20, "document": 20, "start": [20, 35], "instal": [20, 23, 24, 39], "advanc": 20, "refer": 20, "indic": 20, "tabl": 20, "from": [21, 22], "sourc": [21, 22], "code": [21, 22], "linux": [21, 23], "prerequisit": [21, 22, 35], "docker": [21, 22, 35], "imag": [21, 22], "1": [21, 40], "One": 21, "step": [21, 35], "By": 21, "creat": 21, "contain": [21, 22, 28], "link": [21, 22], "header": 21, "file": [21, 22], "desktop": 22, "acquir": 22, "an": 22, "extract": 22, "bare": 22, "metal": 22, "about": 25, "nativ": 25, "can": 25, "you": 25, "do": 25, "With": 25, "analysi": 26, "featur": [26, 40], "descript": 26, "usag": [26, 36], "time": [26, 36], "command": 26, "line": 26, "environ": 26, "variabl": 26, "coordin": 26, "nvidia": 26, "nsight": 26, "system": 26, "launch": [26, 35], "profil": 26, "ifb": 26, "iter": 26, "tune": 27, "To": 27, "measur": 27, "fuse": [27, 28], "remov": 27, "maximum": 27, "number": 27, "sequenc": 27, "block": 27, "mode": 27, "custom": 27, "allreduc": 27, "share": 27, "look": 27, "horizont": 27, "gate": [27, 28], "gemm": 27, "bert": 27, "max": 27, "free": 27, "fraction": 27, "schedul": 27, "polici": 27, "overlap": 27, "size": [27, 36], "methodologi": 28, "peak": 28, "l40": 28, "fp16": [28, 37], "low": 28, "sup": 28, "known": [28, 36, 40], "matmul": 28, "silu": 28, "reproduc": 28, "setup": 28, "per": 28, "j": 28, "6b": 28, "7b": 28, "layer": 30, "activ": [30, 36], "cast": 30, "conv": 30, "linear": 30, "normal": 30, "pool": [30, 36], "quick": 35, "guid": 35, "deploi": 35, "understand": 36, "faq": 36, "numer": 37, "precis": 37, "fp32": 37, "bf16": 37, "dequant": 37, "q": 37, "dq": 37, "smoothquant": 37, "w8a8": 37, "onli": 37, "w4a16": 37, "w8a16": 37, "gptq": 37, "hopper": 37, "matrix": [37, 38], "technic": 37, "quantmod": 37, "flag": 37, "hardwar": 38, "softwar": 38, "troubleshoot": 39, "debug": 39, "unit": 39, "test": 39, "e2": 39, "error": 39, "tip": 39, "releas": 40, "0": 40, "9": 40, "announc": 40, "kei": 40, "enhanc": 40, "updat": 40, "limit": [40, 41], "fix": 40, "8": 40, "7": 40, "specul": 41, "improv": 41, "draft": 41, "approach": 41, "medusa": 41, "tree": 41}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.todo": 2, "sphinx": 60}, "alltitles": {"The Batch Manager in TensorRT-LLM": [[2, "the-batch-manager-in-tensorrt-llm"]], "The Batch Manager API": [[2, "the-batch-manager-api"]], "Get and Send Callbacks": [[2, "get-and-send-callbacks"]], "Request Interruption": [[2, "request-interruption"]], "Statistics": [[2, "statistics"]], "Logits Post-Processor (optional)": [[2, "logits-post-processor-optional"], [19, "logits-post-processor-optional"]], "Other mandatory GptManager parameters": [[2, "other-mandatory-gptmanager-parameters"]], "Optional GptManager parameters": [[2, "optional-gptmanager-parameters"]], "Responses content": [[2, "responses-content"]], "GptManager Design": [[2, "gptmanager-design"]], "Multi-GPU execution": [[2, "multi-gpu-execution"]], "In-flight Batching with the Triton Inference Server": [[2, "in-flight-batching-with-the-triton-inference-server"]], "Expert Parallelism in TensorRT-LLM": [[3, "expert-parallelism-in-tensorrt-llm"]], "Mixture of Experts (MoE)": [[3, "mixture-of-experts-moe"]], "Tensor Parallel vs Expert Parallel": [[3, "tensor-parallel-vs-expert-parallel"]], "How to Enable": [[3, "how-to-enable"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[4, "multi-head-multi-query-and-group-query-attention"]], "Important Note": [[4, "important-note"]], "Padded and Packed Tensors": [[4, "padded-and-packed-tensors"]], "Context and Generation Phases": [[4, "context-and-generation-phases"]], "Context Phase": [[4, "context-phase"]], "FP8 Context FMHA": [[4, "fp8-context-fmha"]], "Generation Phase": [[4, "generation-phase"]], "XQA Optimization": [[4, "xqa-optimization"]], "In-flight Batching": [[4, "in-flight-batching"]], "Chunked Context": [[4, "chunked-context"], [27, "chunked-context"]], "KV Cache": [[4, "kv-cache"]], "Contiguous KV Cache": [[4, "contiguous-kv-cache"]], "Paged KV Cache": [[4, "paged-kv-cache"], [27, "paged-kv-cache"]], "INT8/FP8 KV Caches": [[4, "int8-fp8-kv-caches"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[4, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "StreamingLLM": [[4, "streamingllm"]], "Beam-Search": [[4, "beam-search"]], "Input QKV tensor": [[4, "input-qkv-tensor"]], "Rotary Positional Embedding (RoPE)": [[4, "rotary-positional-embedding-rope"]], "ALiBi": [[4, "alibi"]], "Scaling factor(s)": [[4, "scaling-factor-s"]], "Cross Attention": [[4, "cross-attention"]], "Relative Attention Bias (RAB)": [[4, "relative-attention-bias-rab"]], "C++ GPT Runtime": [[5, "c-gpt-runtime"]], "Overview": [[5, "overview"], [10, "overview"], [13, "overview"], [25, "overview"], [28, "overview"]], "Model Configuration": [[5, "model-configuration"]], "World Configuration": [[5, "world-configuration"]], "Sampling Parameters": [[5, "sampling-parameters"]], "The Session": [[5, "the-session"]], "Internal Components": [[5, "internal-components"]], "In-flight Batching Support": [[5, "in-flight-batching-support"]], "Know Issues and Future Changes": [[5, "know-issues-and-future-changes"]], "Graph Rewriting Module": [[6, "graph-rewriting-module"]], "When to Use Graph Rewriting?": [[6, "when-to-use-graph-rewriting"]], "Graph Rewriting APIs": [[6, "graph-rewriting-apis"]], "Tensor-Related Methods": [[6, "tensor-related-methods"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[6, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "Pattern and Pattern Manager": [[6, "pattern-and-pattern-manager"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[6, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "Classical Workflow": [[6, "classical-workflow"]], "Inference Request": [[7, "inference-request"]], "Run gpt-2b + LoRA using GptManager / cpp runtime": [[8, "run-gpt-2b-lora-using-gptmanager-cpp-runtime"]], "LoRA tensor format details": [[8, "lora-tensor-format-details"]], "Example LoRA tensors": [[8, "example-lora-tensors"]], "LoRA Module id mapping": [[8, "lora-module-id-mapping"]], "LoraCache configuration": [[8, "loracache-configuration"]], "Adding a Model": [[9, "adding-a-model"]], "TensorRT-LLM Checkpoint": [[10, "tensorrt-llm-checkpoint"]], "Prepare the TensorRT-LLM Checkpoint": [[10, "prepare-the-tensorrt-llm-checkpoint"]], "Config": [[10, "config"]], "Rank Weights": [[10, "rank-weights"]], "Attention Weights": [[10, "attention-weights"]], "MLP Weights": [[10, "mlp-weights"]], "LayerNorm Weights": [[10, "layernorm-weights"]], "KV Cache Quantization Scaling Factors": [[10, "kv-cache-quantization-scaling-factors"]], "FP8 Quantization Scaling Factors": [[10, "fp8-quantization-scaling-factors"]], "AWQ Quantization Scaling Factors": [[10, "awq-quantization-scaling-factors"]], "Example": [[10, "example"]], "Build Checkpoint into TensorRT Engine": [[10, "build-checkpoint-into-tensorrt-engine"]], "Make Evaluation": [[10, "make-evaluation"]], "Runtime": [[11, "runtime"], [1, "runtime"], [34, "module-tensorrt_llm"]], "Model Definition": [[11, "model-definition"]], "Compilation": [[11, "compilation"]], "TensorRT Compiler": [[11, "tensorrt-compiler"]], "Model Engine": [[11, "model-engine"]], "Weight Bindings": [[11, "weight-bindings"]], "Pattern-Matching and Fusion": [[11, "pattern-matching-and-fusion"]], "Plugins": [[11, "plugins"]], "Multi-GPU and Multi-Node Support": [[11, "multi-gpu-and-multi-node-support"]], "TensorRT-LLM Architecture": [[12, "tensorrt-llm-architecture"]], "Model Weights": [[12, "model-weights"]], "TensorRT-LLM Build Workflow": [[13, "tensorrt-llm-build-workflow"]], "Conversion APIs": [[13, "conversion-apis"]], "Quantization APIs": [[13, "quantization-apis"]], "Build APIs": [[13, "build-apis"]], "CLI Tools": [[13, "cli-tools"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[14, "falcon-180b-on-a-single-h200-gpu-with-int4-awq-and-6-7x-faster-llama-70b-over-a100"]], "Falcon-180B on a single H200 with INT4 AWQ": [[14, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Llama-70B on H200 up to 6.7x A100": [[14, "llama-70b-on-h200-up-to-6-7x-a100"]], "Closing": [[14, "closing"], [17, "closing"]], "H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token": [[15, "h100-has-4-6x-a100-performance-in-tensorrt-llm-achieving-10-000-tok-s-at-100ms-to-first-token"]], "MLPerf on H100 with FP8": [[15, "mlperf-on-h100-with-fp8"]], "What is H100 FP8?": [[15, "what-is-h100-fp8"]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM": [[16, "h200-achieves-nearly-12-000-tokens-sec-on-llama2-13b-with-tensorrt-llm"]], "H200 vs H100": [[16, "h200-vs-h100"]], "Latest HBM Memory": [[16, "latest-hbm-memory"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[17, "new-xqa-kernel-provides-2-4x-more-llama-70b-throughput-within-the-same-latency-budget"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[17, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[18, "speed-up-inference-with-sota-quantization-techniques-in-trt-llm"]], "Quantization in TensorRT-LLM": [[18, "quantization-in-tensorrt-llm"]], "Benchmark": [[18, "benchmark"]], "Performance": [[18, "performance"], [20, null]], "Accuracy": [[18, "accuracy"]], "Best practices to choose the right quantization methods": [[18, "best-practices-to-choose-the-right-quantization-methods"]], "What\u2019s coming next": [[18, "whats-coming-next"]], "Executor API": [[19, "executor-api"]], "The Executor Class": [[19, "the-executor-class"]], "The Request Class": [[19, "the-request-class"]], "The Response Class": [[19, "the-response-class"]], "The Result Class": [[19, "the-result-class"]], "C++ Executor API Example": [[19, "c-executor-api-example"]], "Python Bindings for the Executor API": [[19, "python-bindings-for-the-executor-api"]], "Building from Source Code on Linux": [[21, "building-from-source-code-on-linux"]], "Prerequisites": [[21, "prerequisites"], [22, "prerequisites"], [35, "prerequisites"]], "Building a TensorRT-LLM Docker Image": [[21, "building-a-tensorrt-llm-docker-image"], [22, "building-a-tensorrt-llm-docker-image"]], "Option 1: Build TensorRT-LLM in One Step": [[21, "option-1-build-tensorrt-llm-in-one-step"]], "Option 2: Build TensorRT-LLM Step-By-Step": [[21, "option-2-build-tensorrt-llm-step-by-step"]], "Create the Container": [[21, "create-the-container"]], "Build TensorRT-LLM": [[21, "build-tensorrt-llm"]], "Building the Python Bindings for the C++ Runtime": [[21, "building-the-python-bindings-for-the-c-runtime"]], "Linking with the TensorRT-LLM C++ Runtime": [[21, "linking-with-the-tensorrt-llm-c-runtime"], [22, "linking-with-the-tensorrt-llm-c-runtime"]], "Supported C++ Header Files": [[21, "supported-c-header-files"]], "Building from Source Code on Windows": [[22, "building-from-source-code-on-windows"]], "Docker Desktop": [[22, "docker-desktop"]], "Acquire an Image": [[22, "acquire-an-image"]], "Run the Container": [[22, "run-the-container"]], "Build and Extract Files": [[22, "build-and-extract-files"]], "Building TensorRT-LLM on Bare Metal": [[22, "building-tensorrt-llm-on-bare-metal"]], "Installing on Linux": [[23, "installing-on-linux"]], "Installing on Windows": [[24, "installing-on-windows"]], "About TensorRT-LLM": [[25, "about-tensorrt-llm"]], "Common LLM Support": [[25, "common-llm-support"]], "In-Flight Batching and Paged Attention": [[25, "in-flight-batching-and-paged-attention"]], "Multi-GPU Multi-Node Inference": [[25, "multi-gpu-multi-node-inference"]], "FP8 Support": [[25, "fp8-support"]], "Latest GPU Support": [[25, "latest-gpu-support"]], "Native Windows Support": [[25, "native-windows-support"]], "What Can You Do With TensorRT-LLM?": [[25, "what-can-you-do-with-tensorrt-llm"]], "Performance Analysis": [[26, "performance-analysis"]], "Feature Descriptions": [[26, "feature-descriptions"]], "Usage": [[26, "usage"]], "Inference Time Command Line Options": [[26, "inference-time-command-line-options"]], "Inference Time Environment Variables": [[26, "inference-time-environment-variables"]], "Coordinating with NVIDIA Nsight Systems Launch": [[26, "coordinating-with-nvidia-nsight-systems-launch"]], "Examples": [[26, "examples"]], "Profiling a single IFB iteration executing on a single rank of a multi-GPU model": [[26, "profiling-a-single-ifb-iteration-executing-on-a-single-rank-of-a-multi-gpu-model"]], "Best Practices for Tuning the Performance of TensorRT-LLM": [[27, "best-practices-for-tuning-the-performance-of-tensorrt-llm"]], "How To Measure Performance?": [[27, "how-to-measure-performance"]], "Build Options to Optimize the Performance of TensorRT-LLM Models?": [[27, "build-options-to-optimize-the-performance-of-tensorrt-llm-models"]], "GPT Attention Plugin and Context Fused Multi-Head Attention": [[27, "gpt-attention-plugin-and-context-fused-multi-head-attention"]], "Remove Input Padding": [[27, "remove-input-padding"]], "Maximum Number of Tokens": [[27, "maximum-number-of-tokens"]], "In-flight Sequence Batching": [[27, "in-flight-sequence-batching"]], "Multi-Block Mode": [[27, "multi-block-mode"]], "Custom AllReduce Plugin": [[27, "custom-allreduce-plugin"]], "Embedding Parallelism, Embedding Sharing, and Look-Up Plugin": [[27, "embedding-parallelism-embedding-sharing-and-look-up-plugin"]], "Horizontal Fusion in Gated-MLP": [[27, "horizontal-fusion-in-gated-mlp"]], "GEMM Plugin": [[27, "gemm-plugin"]], "BERT Attention Plugin and Context Fused Multi-Head Attention": [[27, "bert-attention-plugin-and-context-fused-multi-head-attention"]], "Runtime Options to Optimize the Performance of TensorRT-LLM Models?": [[27, "runtime-options-to-optimize-the-performance-of-tensorrt-llm-models"]], "GPT Model Type": [[27, "gpt-model-type"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[27, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Batch Scheduler Policy": [[27, "batch-scheduler-policy"]], "TensorRT Overlap": [[27, "tensorrt-overlap"]], "Maximum Attention Window Size": [[27, "maximum-attention-window-size"]], "Methodology": [[28, "methodology"], [28, "id5"]], "Peak Throughput": [[28, "peak-throughput"]], "H200 GPUs (FP8)": [[28, "h200-gpus-fp8"], [28, "id1"]], "H100 GPUs (FP8)": [[28, "h100-gpus-fp8"], [28, "id2"]], "L40S GPUs (FP8)": [[28, "l40s-gpus-fp8"], [28, "id3"]], "A100 GPUs (FP16)": [[28, "a100-gpus-fp16"], [28, "id4"]], "Low Latency<sup>**</sup>": [[28, "low-latency"]], "Known Issues": [[28, "known-issues"], [36, "known-issues"], [40, "known-issues"]], "Fused Matmul + Gated-SiLU (LLaMA)": [[28, "fused-matmul-gated-silu-llama"]], "Reproducing Benchmarked Results": [[28, "reproducing-benchmarked-results"]], "Building the TensorRT-LLM Container": [[28, "building-the-tensorrt-llm-container"]], "Engine Building Setups": [[28, "engine-building-setups"]], "Running on A100": [[28, "running-on-a100"]], "Reproducing First Token Latency": [[28, "reproducing-first-token-latency"]], "Benchmarking per Model": [[28, "benchmarking-per-model"]], "GPT-J 6B": [[28, "gpt-j-6b"]], "Throughput Benchmark": [[28, "throughput-benchmark"], [28, "id6"], [28, "id8"]], "First Token Latency Benchmark": [[28, "first-token-latency-benchmark"], [28, "id7"], [28, "id9"]], "Llama2-7b": [[28, "llama2-7b"]], "Llama2-70b": [[28, "llama2-70b"]], "Falcon-180B": [[28, "falcon-180b"]], "Quick Start Guide": [[35, "quick-start-guide"]], "Launch the Docker": [[35, "launch-the-docker"]], "Retrieve the Model Weights": [[35, "retrieve-the-model-weights"]], "Compile the Model into a TensorRT Engine": [[35, "compile-the-model-into-a-tensorrt-engine"]], "Run the Model": [[35, "run-the-model"]], "Deploy with Triton Inference Server": [[35, "deploy-with-triton-inference-server"]], "Send Requests": [[35, "send-requests"]], "Next Steps": [[35, "next-steps"]], "Memory Usage of TensorRT-LLM": [[36, "memory-usage-of-tensorrt-llm"]], "Understand inference time GPU memory usage": [[36, "understand-inference-time-gpu-memory-usage"]], "Weights size": [[36, "weights-size"]], "Activation size": [[36, "activation-size"]], "KV cache tensor": [[36, "kv-cache-tensor"]], "Python runtime": [[36, "python-runtime"]], "C++ runtime": [[36, "c-runtime"]], "Memory pool": [[36, "memory-pool"]], "FAQ": [[36, "faq"]], "Numerical Precision": [[37, "numerical-precision"]], "FP32, FP16 and BF16": [[37, "fp32-fp16-and-bf16"]], "Quantization and Dequantization (Q/DQ)": [[37, "quantization-and-dequantization-q-dq"]], "INT8 SmoothQuant (W8A8)": [[37, "int8-smoothquant-w8a8"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[37, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "GPTQ and AWQ (W4A16)": [[37, "gptq-and-awq-w4a16"]], "FP8 (Hopper)": [[37, "fp8-hopper"]], "Support matrix": [[37, "support-matrix"]], "Technical Detail: The QuantMode Flags": [[37, "technical-detail-the-quantmode-flags"]], "Support Matrix": [[38, "support-matrix"]], "Hardware": [[38, "hardware"]], "Software": [[38, "software"]], "Troubleshooting": [[39, "troubleshooting"]], "Debug on Unit Tests": [[39, "debug-on-unit-tests"]], "Debug on E2E Models": [[39, "debug-on-e2e-models"]], "Debug Execution Errors": [[39, "debug-execution-errors"]], "Installation Errors": [[39, "installation-errors"]], "Tips": [[39, "tips"]], "Release Notes": [[40, "release-notes"]], "TensorRT-LLM Release 0.9.0": [[40, "tensorrt-llm-release-0-9-0"]], "Announcements": [[40, "announcements"]], "Key Features and Enhancements": [[40, "key-features-and-enhancements"], [40, "id2"], [40, "id4"]], "API Changes": [[40, "api-changes"]], "Model Updates": [[40, "model-updates"], [40, "id3"], [40, "id5"]], "Limitations": [[40, "limitations"], [41, "limitations"]], "Fixed Issues": [[40, "fixed-issues"], [40, "id6"]], "TensorRT-LLM Release 0.8.0": [[40, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.7.1": [[40, "tensorrt-llm-release-0-7-1"]], "Executor": [[0, "executor"]], "executor.h": [[0, "executor-h"]], "tensor.h": [[0, "tensor-h"]], "types.h": [[0, "types-h"]], "bufferManager.h": [[1, "buffermanager-h"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "generationInput.h": [[1, "generationinput-h"]], "generationOutput.h": [[1, "generationoutput-h"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatch.h": [[1, "gptdecoderbatch-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "gptModelConfig.h": [[1, "gptmodelconfig-h"]], "gptSession.h": [[1, "gptsession-h"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatch.h": [[1, "igptdecoderbatch-h"]], "iStatefulGptDecoder.h": [[1, "istatefulgptdecoder-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "worldConfig.h": [[1, "worldconfig-h"]], "decodingMode.h": [[1, "decodingmode-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "Welcome to TensorRT-LLM\u2019s Documentation!": [[20, "welcome-to-tensorrt-llm-s-documentation"]], "Getting Started": [[20, null]], "Installation": [[20, null]], "Architecture": [[20, null]], "Advanced": [[20, null]], "Reference": [[20, null]], "Indices and tables": [[20, "indices-and-tables"]], "Functionals": [[29, "module-tensorrt_llm"]], "Layers": [[30, "module-tensorrt_llm"]], "Activation": [[30, "module-tensorrt_llm.layers.activation"]], "Attention": [[30, "module-tensorrt_llm.layers.attention"]], "Cast": [[30, "module-tensorrt_llm.layers.cast"]], "Conv": [[30, "module-tensorrt_llm.layers.conv"]], "Embedding": [[30, "module-tensorrt_llm.layers.embedding"]], "Linear": [[30, "module-tensorrt_llm.layers.linear"]], "MLP": [[30, "module-tensorrt_llm.layers.mlp"]], "Normalization": [[30, "module-tensorrt_llm.layers.normalization"]], "Pooling": [[30, "module-tensorrt_llm.layers.pooling"]], "Models": [[31, "module-tensorrt_llm"]], "Plugin": [[32, "module-tensorrt_llm"]], "Quantization": [[33, "module-tensorrt_llm"]], "Speculative Sampling": [[41, "speculative-sampling"]], "Performance Improvements": [[41, "performance-improvements"]], "Draft Model Approach": [[41, "draft-model-approach"]], "Medusa": [[41, "medusa"]], "Medusa Tree": [[41, "medusa-tree"]], "Using Medusa with TensorRT-LLM": [[41, "using-medusa-with-tensorrt-llm"]]}, "indexentries": {"tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm"], [1, "_CPPv412tensorrt_llm"]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE"]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE"]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE"]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE"]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE"]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE"]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE"]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE"]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE"]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE"]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E"]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE"]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E"]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E"]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E"]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E"]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E"]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E"]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E"]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE"]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE"]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig"], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig"], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig"]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE"]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv"]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType"]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request"]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE"]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv"]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv"]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE"]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE"]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv"]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev"]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE"]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE"]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv"]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv"]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv"]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv"]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getLogitsPostProcessorMapEv"]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv"]], "tensorrt_llm::executor::executorconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getMedusaChoicesEv"]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv"]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv"]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv"]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv"]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv"]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE"]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE"]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE"]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE"]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mLogitsPostProcessorMapE"]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE"]], "tensorrt_llm::executor::executorconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mMedusaChoicesE"]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE"]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE"]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE"]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE"]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE"]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType"]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb"]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE8SizeType"]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig"]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setLogitsPostProcessorMapERK22LogitsPostProcessorMap"]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE8SizeType"]], "tensorrt_llm::executor::executorconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setMedusaChoicesERK13MedusaChoices"]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb"]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig"]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig"]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE8SizeType"]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig"]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE"]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE"]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE"]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE"]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE"]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE"]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE"]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE"]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE"]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE"]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE"]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE"]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE"]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE"]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE"]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE"]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE"]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE"]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE"]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE"]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE"]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE"]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats"], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats"], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration"]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE"]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb"]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv"]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv"]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv"]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getMaxAttentionWindowEv"]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv"]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv"]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv"]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE"]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE"]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE"]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mMaxAttentionWindowE"]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE"]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE"]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE"]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE"]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE"]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE"]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE"]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE"]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE"]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE"]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE"]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv"]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv"]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv"]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE"]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE"]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE"]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE"]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE"]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE"]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE"]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE"]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE"]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME"]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE"]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE"]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE"]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb"]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE"]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE"]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE"]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE"]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE"]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE"]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv"]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv"]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv"]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv"]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE"]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE"]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE"]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE"]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode"]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType"]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI8SizeTypeEE"]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI8SizeTypeEE"]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE"]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE"]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv"]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv"]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv"]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv"]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv"]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv"]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv"]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv"]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv"]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv"]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv"]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE"]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE"]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE"]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE"]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE"]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE"]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE"]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE"]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE"]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE"]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE"]], "tensorrt_llm::executor::phonynameduetoerror::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE"]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE"]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6Tensor"]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv"]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE"]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE"]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE"]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE"], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request"], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request"]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv"]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv"]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv"]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv"]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv"]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv"]], "tensorrt_llm::executor::request::getmaxnewtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getMaxNewTokensEv"]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv"]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv"]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv"]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv"]], "tensorrt_llm::executor::request::getspeculativedecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getSpeculativeDecodingConfigEv"]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv"]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv"]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE"]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request"], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request"]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE"]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor"]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE8SizeType"]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE"]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig"]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig"]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE8SizeType"]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig"]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig"]], "tensorrt_llm::executor::request::setspeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setSpeculativeDecodingConfigERK25SpeculativeDecodingConfig"]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE"]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb"]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev"]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE"]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE"]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE"]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE"]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE"]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE"]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE"]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE"]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE"]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE"]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE"]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE"]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE"]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE"]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE"]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE"]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6Result"], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringE"], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response"], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response"]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv"]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv"]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv"]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv"]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE"]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response"], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response"]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev"]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE"]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE"]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE"]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE"]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE"]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE"]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE"]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE"]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE"]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv"]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv"]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv"]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv"]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv"]], "tensorrt_llm::executor::samplingconfig::getminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinLengthEv"]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv"]], "tensorrt_llm::executor::samplingconfig::getrandomseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig13getRandomSeedEv"]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv"]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv"]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv"]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv"]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv"]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv"]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv"]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE"]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE"]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE"]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE"]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE"]], "tensorrt_llm::executor::samplingconfig::mminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinLengthE"]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE"]], "tensorrt_llm::executor::samplingconfig::mrandomseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig11mRandomSeedE"]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE"]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE"]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE"]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE"]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE"]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE"]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE"]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig"]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE"]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE15SchedulerPolicy"]], "tensorrt_llm::executor::schedulerconfig::getpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig9getPolicyEv"]], "tensorrt_llm::executor::schedulerconfig::mpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig7mPolicyE"]], "tensorrt_llm::executor::schedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerPolicyE"]], "tensorrt_llm::executor::schedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerPolicy20kGUARANTEED_NO_EVICTE"]], "tensorrt_llm::executor::schedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerPolicy16kMAX_UTILIZATIONE"]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE"]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE"]], "tensorrt_llm::executor::shape::dimtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape7DimTypeE"]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI7DimTypeEE"], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK7DimTypeN4Base9size_typeE"], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv"]], "tensorrt_llm::executor::sizetype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8SizeTypeE"]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE"]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEE"]], "tensorrt_llm::executor::speculativedecodingconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig22getAcceptanceThresholdEv"]], "tensorrt_llm::executor::speculativedecodingconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig9getLogitsEv"]], "tensorrt_llm::executor::speculativedecodingconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig9getTokensEv"]], "tensorrt_llm::executor::speculativedecodingconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig20mAcceptanceThresholdE"]], "tensorrt_llm::executor::speculativedecodingconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig7mLogitsE"]], "tensorrt_llm::executor::speculativedecodingconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig7mTokensE"]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE"]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE"]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE"]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE"]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE"]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE"]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE"]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE"]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE"]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE"]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv"]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr"]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE"]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE"]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE"]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE"]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE"]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape"]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE"]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor"]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv"], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv"]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv"]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv"]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev"]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv"]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv"]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv"]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape"]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE"]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape"]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape"], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape"]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv"]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor"]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor"], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor"]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor"]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape"]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape"], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape"]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr"]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr"]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev"]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE"]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE"]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE"]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE"]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE"]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE"]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE"]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE"]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE"]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE"]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE"]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE"]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE"]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE"]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE"]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE"]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE"]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE"]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE"]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE"]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE"]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE"]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE"]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor"]], "tensorrt_llm::executor::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor30kDefaultIterStatsMaxIterationsE"]], "tensorrt_llm::executor::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33kDefaultRequestStatsMaxIterationsE"]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE"], [1, "_CPPv4N12tensorrt_llm7runtimeE"]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL"]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1"]], "tensorrt_llm::batch_manager (c++ type)": [[1, "_CPPv4N12tensorrt_llm13batch_managerE"]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE"]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE"]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb"]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv"]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv"]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv"]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv"]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE"]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE"]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE"]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv"]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE"]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb"]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE"]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE"]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer"]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType"], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType"], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType"]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv"]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::initmemorypool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi"]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE"]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE"]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE"]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv"]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv"]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE"]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi"], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv"]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE"], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t"]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer"]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev"]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE"]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE"]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type"], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer"]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE"]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb"], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj"]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE"]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb"], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE"]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer"]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE"]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE"]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv"]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE"]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE"]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv"]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE"]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t"], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib"], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji"]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE"]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb"], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv"]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE"]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t"]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE"]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv"]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv"]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE"]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE"]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE"], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent"]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv"]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE"], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent"]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE"]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE"]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE"]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE"]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE"]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE"]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE"]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE"]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE"]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE"]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE"]], "tensorrt_llm::runtime::decodinginput::badwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE"]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE"]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE"]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE"]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE"]], "tensorrt_llm::runtime::decodinginput::finished (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE"]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE"]], "tensorrt_llm::runtime::decodinginput::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE"]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE"]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE"]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE"]], "tensorrt_llm::runtime::decodinginput::maxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12maxBatchSizeE"]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE"]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE"]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE"]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE"]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE"]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE"]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE"]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE"]], "tensorrt_llm::runtime::decodinginput::stopwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE"]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE"]], "tensorrt_llm::runtime::decodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingModeE"]], "tensorrt_llm::runtime::decodingmode::beamsearch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode10BeamSearchEv"]], "tensorrt_llm::runtime::decodingmode::decodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12DecodingModeE14UnderlyingType"]], "tensorrt_llm::runtime::decodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6MedusaEv"]], "tensorrt_llm::runtime::decodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode4NoneEv"]], "tensorrt_llm::runtime::decodingmode::topk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode4TopKEv"]], "tensorrt_llm::runtime::decodingmode::topktopp (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode8TopKTopPEv"]], "tensorrt_llm::runtime::decodingmode::topp (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode4TopPEv"]], "tensorrt_llm::runtime::decodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode14UnderlyingTypeE"]], "tensorrt_llm::runtime::decodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12DecodingMode9allBitSetE14UnderlyingType"]], "tensorrt_llm::runtime::decodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12DecodingMode9anyBitSetE14UnderlyingType"]], "tensorrt_llm::runtime::decodingmode::isbeamsearch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12isBeamSearchEv"]], "tensorrt_llm::runtime::decodingmode::ismedusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode8isMedusaEv"]], "tensorrt_llm::runtime::decodingmode::isnone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6isNoneEv"]], "tensorrt_llm::runtime::decodingmode::istopk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6isTopKEv"]], "tensorrt_llm::runtime::decodingmode::istopkandtopp (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode13isTopKandTopPEv"]], "tensorrt_llm::runtime::decodingmode::istopkortopp (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode12isTopKorTopPEv"]], "tensorrt_llm::runtime::decodingmode::istopp (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6isTopPEv"]], "tensorrt_llm::runtime::decodingmode::kbeamsearch (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode11kBeamSearchE"]], "tensorrt_llm::runtime::decodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode7kMedusaE"]], "tensorrt_llm::runtime::decodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode5kNoneE"]], "tensorrt_llm::runtime::decodingmode::ktopk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode5kTopKE"]], "tensorrt_llm::runtime::decodingmode::ktopktopp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode9kTopKTopPE"]], "tensorrt_llm::runtime::decodingmode::ktopp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode5kTopPE"]], "tensorrt_llm::runtime::decodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12DecodingMode6mStateE"]], "tensorrt_llm::runtime::decodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12DecodingModeeqERK12DecodingMode"]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::isdone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidstgt (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthstgt (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType"]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr"]], "tensorrt_llm::runtime::decodingoutput::medusaoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputsE"]], "tensorrt_llm::runtime::decodingoutput::medusaoutputs::medusaacceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs27medusaAcceptedLengthsCumSumE"]], "tensorrt_llm::runtime::decodingoutput::medusaoutputs::medusaacceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs23medusaAcceptedTokensLenE"]], "tensorrt_llm::runtime::decodingoutput::medusaoutputs::medusanextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs21medusaNextDraftTokensE"]], "tensorrt_llm::runtime::decodingoutput::medusaoutputs::medusapathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs18medusaPathsOffsetsE"]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE"]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE"]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE"]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE"]], "tensorrt_llm::runtime::decodingoutput::finished (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE"]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE"]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE"]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE"]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE"]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE"]], "tensorrt_llm::runtime::decodingoutput::medusaoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13medusaOutputsE"]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE"]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE"]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE"]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE"]], "tensorrt_llm::runtime::generationinput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15GenerationInputE"]], "tensorrt_llm::runtime::generationinput::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE"]], "tensorrt_llm::runtime::generationinput::generationinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::generationinput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::generationoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutputE"]], "tensorrt_llm::runtime::generationoutput::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE"]], "tensorrt_llm::runtime::generationoutput::generationoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::generationoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationinput (c++ class)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE"]], "tensorrt_llm::runtime::genericgenerationinput::genericgenerationinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::genericgenerationinput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationinput::badwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE"]], "tensorrt_llm::runtime::genericgenerationinput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE"]], "tensorrt_llm::runtime::genericgenerationinput::endid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE"]], "tensorrt_llm::runtime::genericgenerationinput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE"]], "tensorrt_llm::runtime::genericgenerationinput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE"]], "tensorrt_llm::runtime::genericgenerationinput::maxnewtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE"]], "tensorrt_llm::runtime::genericgenerationinput::packed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE"]], "tensorrt_llm::runtime::genericgenerationinput::padid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE"]], "tensorrt_llm::runtime::genericgenerationinput::prompttuningparams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE"]], "tensorrt_llm::runtime::genericgenerationinput::stopwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE"]], "tensorrt_llm::runtime::genericgenerationoutput (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE"]], "tensorrt_llm::runtime::genericgenerationoutput::callback (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE"]], "tensorrt_llm::runtime::genericgenerationoutput::genericgenerationoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::genericgenerationoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE"]], "tensorrt_llm::runtime::genericgenerationoutput::contextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE"]], "tensorrt_llm::runtime::genericgenerationoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE"]], "tensorrt_llm::runtime::genericgenerationoutput::generationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE"]], "tensorrt_llm::runtime::genericgenerationoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE"]], "tensorrt_llm::runtime::genericgenerationoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE"]], "tensorrt_llm::runtime::genericgenerationoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE"]], "tensorrt_llm::runtime::genericgenerationoutput::ontokengenerated (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE"]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE"]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE"]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE"]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE"]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE"]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE"]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE"]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE"]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE"]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoder::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::gptdecoder::gathertree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv"]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE"]], "tensorrt_llm::runtime::gptdecoder::mlogprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE"]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE"]], "tensorrt_llm::runtime::gptdecoder::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE"]], "tensorrt_llm::runtime::gptdecoder::mprop (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5mPropE"]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE"]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE"]], "tensorrt_llm::runtime::gptdecoderbatch (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE"]], "tensorrt_llm::runtime::gptdecoderbatch::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderbatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr"]], "tensorrt_llm::runtime::gptdecoderbatch::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14SharedConstPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::gptdecoderbatch::allocatemedusabuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch21allocateMedusaBuffersEv"]], "tensorrt_llm::runtime::gptdecoderbatch::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType"], [1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"], [1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardasyncfuseddecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardasyncunfuseddecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent"]], "tensorrt_llm::runtime::gptdecoderbatch::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"], [1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType"], [1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getfinished (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType"], [1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getmedusaacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch30getMedusaAcceptedLengthsCumSumEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getmedusaacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch28getMedusaAcceptedPackedPathsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbfinished (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnbsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18getNextDraftTokensEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getoutputids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType"], [1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::gptdecoderbatch::macceptbylogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mAcceptByLogitsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mactualbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbatchslotsacceptlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mBatchSlotsAcceptLogitsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbatchslotsaccepttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mBatchSlotsAcceptTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbatchslotsdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mBatchSlotsDecoderE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbatchslotssetup (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mBatchSlotsSetupE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbeamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE"]], "tensorrt_llm::runtime::gptdecoderbatch::mcurandstates (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mCurandStatesE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecoders (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodinginputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mDraftLogitsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mDraftProbsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinished (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinishedsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mFinishedStepsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfinishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE"]], "tensorrt_llm::runtime::gptdecoderbatch::mforwardtoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE"]], "tensorrt_llm::runtime::gptdecoderbatch::mfuseddecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mFusedDecoderE"]], "tensorrt_llm::runtime::gptdecoderbatch::mgeneratedtokensperenginestep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29mGeneratedTokensPerEngineStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mMaxAttentionWindowE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mMaxBadWordsLenE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxnewtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mMaxStopWordsLenE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxtokensperdecoderstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24mMaxTokensPerDecoderStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mmaxtokensperenginestep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mMaxTokensPerEngineStepE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnbsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mnumdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE"]], "tensorrt_llm::runtime::gptdecoderbatch::msinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mSinkTokenLengthE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE"]], "tensorrt_llm::runtime::gptdecoderbatch::mstreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mtargetlogitsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mTargetLogitsPtrsE"]], "tensorrt_llm::runtime::gptdecoderbatch::mtargetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mTargetProbsE"]], "tensorrt_llm::runtime::gptdecoderbatch::musemedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mUseMedusaE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE"]], "tensorrt_llm::runtime::gptdecoderbatch::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE"]], "tensorrt_llm::runtime::gptdecoderbatch::newbatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequest (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequestmedusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16newRequestMedusaE8SizeTypeRKN13decoder_batch7RequestE"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequestspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29newRequestSpeculativeDecodingE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::newrequests (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE"]], "tensorrt_llm::runtime::gptdecoderbatch::postprocessrequest (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType"]], "tensorrt_llm::runtime::gptdecoderbatch::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig"]], "tensorrt_llm::runtime::gptdecoderbatch::setupmedusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11setupMedusaERK14GptModelConfig"]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig"]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig"], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE"]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv"]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv"]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv"]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv"]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv"]], "tensorrt_llm::runtime::gptjsonconfig::mgptmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE"]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE"]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE"]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE"]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE"], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE"], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE"]], "tensorrt_llm::runtime::gptmodelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfigE"]], "tensorrt_llm::runtime::gptmodelconfig::gptmodelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE"]], "tensorrt_llm::runtime::gptmodelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant6kMambaE"]], "tensorrt_llm::runtime::gptmodelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getcontextfmhaforgeneration (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig27getContextFMHAForGenerationEv"]], "tensorrt_llm::runtime::gptmodelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv"]], "tensorrt_llm::runtime::gptmodelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getKvDataTypeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getLoraModulesEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmambaconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMambaConfigEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBeamWidthEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxdraftlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxDraftLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxLoraRankEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getMaxSequenceLenEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmaxtokensperstep (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmedusamodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMedusaModuleEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig16getMlpHiddenSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv"]], "tensorrt_llm::runtime::gptmodelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getPagedContextFMHAEv"]], "tensorrt_llm::runtime::gptmodelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv"]], "tensorrt_llm::runtime::gptmodelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv"]], "tensorrt_llm::runtime::gptmodelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::hasmambaconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14hasMambaConfigEv"]], "tensorrt_llm::runtime::gptmodelconfig::isssmbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10isSsmBasedEv"]], "tensorrt_llm::runtime::gptmodelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18isTransformerBasedEv"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE"]], "tensorrt_llm::runtime::gptmodelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE"]], "tensorrt_llm::runtime::gptmodelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE"]], "tensorrt_llm::runtime::gptmodelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mLoraModulesE"]], "tensorrt_llm::runtime::gptmodelconfig::mmambaconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMambaConfigE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBeamWidthE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxdraftlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxLoraRankE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mMaxSequenceLenE"]], "tensorrt_llm::runtime::gptmodelconfig::mmedusamodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMedusaModuleE"]], "tensorrt_llm::runtime::gptmodelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mMlpHiddenSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnbkvheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE"]], "tensorrt_llm::runtime::gptmodelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17mPagedContextFMHAE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedkvcache (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE"]], "tensorrt_llm::runtime::gptmodelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mPagedStateE"]], "tensorrt_llm::runtime::gptmodelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mSizePerHeadE"]], "tensorrt_llm::runtime::gptmodelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE"]], "tensorrt_llm::runtime::gptmodelconfig::musecontextfmhaforgeneration (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mUseContextFMHAForGenerationE"]], "tensorrt_llm::runtime::gptmodelconfig::musecustomallreduce (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE"]], "tensorrt_llm::runtime::gptmodelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mUseLoraPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mUseMambaConv1dPluginE"]], "tensorrt_llm::runtime::gptmodelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE"]], "tensorrt_llm::runtime::gptmodelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE"]], "tensorrt_llm::runtime::gptmodelconfig::setmambaconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMambaConfigERK11MambaConfig"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxdraftlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmedusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMedusaModuleERK12MedusaModule"]], "tensorrt_llm::runtime::gptmodelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant"]], "tensorrt_llm::runtime::gptmodelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb"]], "tensorrt_llm::runtime::gptmodelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE"]], "tensorrt_llm::runtime::gptmodelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType"]], "tensorrt_llm::runtime::gptmodelconfig::setusecontextfmhaforgeneration (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb"]], "tensorrt_llm::runtime::gptmodelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv"]], "tensorrt_llm::runtime::gptmodelconfig::usecustomallreduce (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv"]], "tensorrt_llm::runtime::gptmodelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEv"]], "tensorrt_llm::runtime::gptmodelconfig::usemedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig9useMedusaEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepagedkvcache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv"]], "tensorrt_llm::runtime::gptmodelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14GptModelConfig13usePagedStateEb"], [1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13usePagedStateEv"]], "tensorrt_llm::runtime::gptmodelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv"]], "tensorrt_llm::runtime::gptsession (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSessionE"]], "tensorrt_llm::runtime::gptsession::config (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE"]], "tensorrt_llm::runtime::gptsession::config::config (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::config::ctxmicrobatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::cudagraphmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::config::decoderperrequest (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE"]], "tensorrt_llm::runtime::gptsession::config::decodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12decodingModeE"]], "tensorrt_llm::runtime::gptsession::config::genmicrobatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::kvcacheconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE"]], "tensorrt_llm::runtime::gptsession::config::maxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE"]], "tensorrt_llm::runtime::gptsession::config::maxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE"]], "tensorrt_llm::runtime::gptsession::config::maxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::config::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession6Config17normalizeLogProbsE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::cudagraphexecutor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::clear (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::hasinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::launch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::minstance (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::preparenextgraph (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::update (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::uploadtostream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream"]], "tensorrt_llm::runtime::gptsession::cudagraphexecutor::~cudagraphexecutor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev"]], "tensorrt_llm::runtime::gptsession::generationprofiler (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfilerE"]], "tensorrt_llm::runtime::gptsession::generationprofiler::generationprofiler (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler18GenerationProfilerEv"]], "tensorrt_llm::runtime::gptsession::generationprofiler::end (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler3endE"]], "tensorrt_llm::runtime::gptsession::generationprofiler::flags (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5flagsE"]], "tensorrt_llm::runtime::gptsession::generationprofiler::getelapsedtimems (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler16getElapsedTimeMsEv"]], "tensorrt_llm::runtime::gptsession::generationprofiler::getend (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler6getEndEv"]], "tensorrt_llm::runtime::gptsession::generationprofiler::getstart (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler8getStartEv"]], "tensorrt_llm::runtime::gptsession::generationprofiler::start (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5startE"]], "tensorrt_llm::runtime::gptsession::gptsession (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr"], [1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr"], [1, "_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr"]], "tensorrt_llm::runtime::gptsession::kvcacheconfig (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE"]], "tensorrt_llm::runtime::gptsession::kvcachemanager (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::loggerptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::microbatchconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"], [1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::ctxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::genbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::getgengraphid (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numctxbatches (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numctxpergen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv"]], "tensorrt_llm::runtime::gptsession::microbatchconfig::numgenbatches (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE"]], "tensorrt_llm::runtime::gptsession::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE"]], "tensorrt_llm::runtime::gptsession::tokengeneratedcallback (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE"]], "tensorrt_llm::runtime::gptsession::createbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType"]], "tensorrt_llm::runtime::gptsession::createcontexts (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsEv"]], "tensorrt_llm::runtime::gptsession::createcustomallreduceworkspace (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::createdecoders (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode"]], "tensorrt_llm::runtime::gptsession::createkvcachemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig"]], "tensorrt_llm::runtime::gptsession::createontokengeneratedcallback (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput"]], "tensorrt_llm::runtime::gptsession::decoderstepasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::executecontextstep (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager"]], "tensorrt_llm::runtime::gptsession::executegenerationstep (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE"]], "tensorrt_llm::runtime::gptsession::finalize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType"]], "tensorrt_llm::runtime::gptsession::generate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE"]], "tensorrt_llm::runtime::gptsession::generatebatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE"]], "tensorrt_llm::runtime::gptsession::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv"]], "tensorrt_llm::runtime::gptsession::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv"]], "tensorrt_llm::runtime::gptsession::getlogger (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv"]], "tensorrt_llm::runtime::gptsession::getlogitdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession16getLogitDataTypeEv"]], "tensorrt_llm::runtime::gptsession::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv"]], "tensorrt_llm::runtime::gptsession::getnormalizelogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession20getNormalizeLogProbsEv"]], "tensorrt_llm::runtime::gptsession::getworldconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv"]], "tensorrt_llm::runtime::gptsession::initdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType"]], "tensorrt_llm::runtime::gptsession::kvcacheaddsequences (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::mbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE"]], "tensorrt_llm::runtime::gptsession::mcommevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE"]], "tensorrt_llm::runtime::gptsession::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE"]], "tensorrt_llm::runtime::gptsession::mcommstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE"]], "tensorrt_llm::runtime::gptsession::mcudagraphinstances (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE"]], "tensorrt_llm::runtime::gptsession::mcudagraphmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession26mDecoderMaxAttentionWindowE"]], "tensorrt_llm::runtime::gptsession::mdecodermaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE"]], "tensorrt_llm::runtime::gptsession::mdecodersinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession23mDecoderSinkTokenLengthE"]], "tensorrt_llm::runtime::gptsession::mdecoders (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE"]], "tensorrt_llm::runtime::gptsession::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE"]], "tensorrt_llm::runtime::gptsession::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE"]], "tensorrt_llm::runtime::gptsession::mkvcachemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE"]], "tensorrt_llm::runtime::gptsession::mlogger (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE"]], "tensorrt_llm::runtime::gptsession::mmicrobatchconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE"]], "tensorrt_llm::runtime::gptsession::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE"]], "tensorrt_llm::runtime::gptsession::mnormalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession18mNormalizeLogProbsE"]], "tensorrt_llm::runtime::gptsession::mpipelinecomm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE"]], "tensorrt_llm::runtime::gptsession::mreceivedevents (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE"]], "tensorrt_llm::runtime::gptsession::mruntime (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE"]], "tensorrt_llm::runtime::gptsession::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE"]], "tensorrt_llm::runtime::gptsession::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config"]], "tensorrt_llm::runtime::gptsession::shouldstopsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::gptsession::usecudagraphs (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv"]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE"]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE"]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv"]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE"]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE"]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE"]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv"], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE"], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv"]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv"]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv"]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv"]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv"]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv"]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv"]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv"]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer"]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv"]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE"], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE"], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev"]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE"]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::igptdecoder::acceptdrafttokensbyids (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::acceptdrafttokensbylogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE"]], "tensorrt_llm::runtime::igptdecoder::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput"]], "tensorrt_llm::runtime::igptdecoder::gathertree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager"]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv"]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE"]], "tensorrt_llm::runtime::igptdecoder::updatekvcachebasedonacceptedtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev"]], "tensorrt_llm::runtime::igptdecoderbatch (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE"]], "tensorrt_llm::runtime::igptdecoderbatch::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::igptdecoderbatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv"]], "tensorrt_llm::runtime::igptdecoderbatch::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::tokenptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE"]], "tensorrt_llm::runtime::igptdecoderbatch::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE"]], "tensorrt_llm::runtime::igptdecoderbatch::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE"]], "tensorrt_llm::runtime::igptdecoderbatch::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType"], [1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getfinished (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType"], [1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getmedusaacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch30getMedusaAcceptedLengthsCumSumEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getmedusaacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch28getMedusaAcceptedPackedPathsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getnbsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch18getNextDraftTokensEv"]], "tensorrt_llm::runtime::igptdecoderbatch::getoutputids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType"]], "tensorrt_llm::runtime::igptdecoderbatch::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv"]], "tensorrt_llm::runtime::igptdecoderbatch::newrequests (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE"]], "tensorrt_llm::runtime::istatefulgptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE"]], "tensorrt_llm::runtime::istatefulgptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::istatefulgptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE"]], "tensorrt_llm::runtime::istatefulgptdecoder::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE"]], "tensorrt_llm::runtime::istatefulgptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnbfinished (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::getnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType"]], "tensorrt_llm::runtime::istatefulgptdecoder::getoutputids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv"]], "tensorrt_llm::runtime::istatefulgptdecoder::newbatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig"]], "tensorrt_llm::runtime::istatefulgptdecoder::~istatefulgptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev"]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE"]], "tensorrt_llm::runtime::itensor::dimtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE"]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv"]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE"]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE"]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE"]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE"]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE"]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t"]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv"]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE"]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor"]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape"]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE"]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType"], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape"], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape"], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE"]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE"], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE"]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape"]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType"]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape"]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape"]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape"]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape"], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE"], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape"], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE"]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev"]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE"]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE"]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE"]], "tensorrt_llm::runtime::ipcmemory::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE"]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv"]], "tensorrt_llm::runtime::ipcmemory::getcommptrstensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv"]], "tensorrt_llm::runtime::ipcmemory::mbufferptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE"]], "tensorrt_llm::runtime::ipcmemory::mbuffersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE"]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE"]], "tensorrt_llm::runtime::ipcmemory::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE"]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev"]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE"]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager"]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE"]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE"]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE"]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb"], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue"], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv"]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE"]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE"]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE"]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE"]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE"]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE"]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue"]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE"]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev"]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE"]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE"]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE"]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE"]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE"]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE"]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType"]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType"]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE8SizeType"]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb"]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache"]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE"]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType"], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr"]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr"]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType"]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv"]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t"]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType"]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType"]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType"]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType"]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr"], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE"]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE"]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE"]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE"]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE"]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE"]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE"]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE"]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE"]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE"]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE"]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE"]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv"]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType"]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb"]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType"]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType"]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE"]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager"]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE"]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE8SizeType"]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE8SizeType"]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager"]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE"]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE"]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE"]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE"]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE"]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv"]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE"]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK8SizeType"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE8SizeType"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK8SizeType"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK8SizeType"]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK8SizeType"]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE"]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule"], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv"]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE"]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE"]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE"]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE"]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE"]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE"]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE"]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE"]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE"]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE"]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE"]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE"]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE"]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE"]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE"]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE"]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType"]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE8SizeType"]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv"]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv"]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE8SizeType"]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv"]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE8SizeType8SizeType"]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE8SizeType"]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE8SizeType8SizeType"]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE8SizeType8SizeType"]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE8SizeType8SizeType"]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE8SizeType"]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE8SizeType8SizeType"]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE"]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE"]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE"]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE"]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE"]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE"]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE"]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv"]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule"]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv"]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv"]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE8SizeType"]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv"]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType"], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE8SizeType"]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE"]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv"]], "tensorrt_llm::runtime::mambaconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11MambaConfigE"]], "tensorrt_llm::runtime::mambaconfig::dconv (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11MambaConfig5dConvE"]], "tensorrt_llm::runtime::mambaconfig::dstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11MambaConfig6dStateE"]], "tensorrt_llm::runtime::mambaconfig::expand (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11MambaConfig6expandE"]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE"]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE"]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv"]], "tensorrt_llm::runtime::memorycounters::sizetype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE"]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei"], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei"]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType"]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv"]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv"]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv"]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv"]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv"]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv"]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv"]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv"]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE"]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE"]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE"]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE"]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE"]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE"]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME"]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE"]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv"]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE"]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE"]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE"]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE"]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME"]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE"]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE"]], "tensorrt_llm::runtime::phonynameduetoerror::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE"]], "tensorrt_llm::runtime::phonynameduetoerror::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE"]], "tensorrt_llm::runtime::phonynameduetoerror::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE"]], "tensorrt_llm::runtime::phonynameduetoerror::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE"]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE"]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE"]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr"]], "tensorrt_llm::runtime::prompttuningparams::sizetype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE"]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE"]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb"]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE"]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE"]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE"]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType"], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25SpeculativeDecodingConfigEEE"], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE"]], "tensorrt_llm::runtime::samplingconfig::vec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig3VecE"]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE"]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE"]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE"]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE"]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE8SizeTypeEEE"]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE"]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE"]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig"]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE"]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE"]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE"]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE"]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE"]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE"]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE"]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE"]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE"]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE"]], "tensorrt_llm::runtime::sizetype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime8SizeTypeE"]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE"]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE"]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE"]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE"]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE"]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE"]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE"]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE"]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE"]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE"]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE"]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE"]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE"]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv"]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE"]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity"]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE"]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE"]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE"]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv"]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv"]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv"]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv"]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv"]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv"]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv"]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv"]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv"]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE"]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE"]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE"]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE"]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE"]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE"]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv"]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer"], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer"]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE"], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE"]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE"]], "tensorrt_llm::runtime::decoder::input (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder5InputE"]], "tensorrt_llm::runtime::decoder::input::input (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr"]], "tensorrt_llm::runtime::decoder::input::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder::input::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::input::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE"]], "tensorrt_llm::runtime::decoder::output (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder6OutputE"]], "tensorrt_llm::runtime::decoder::output::output (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv"]], "tensorrt_llm::runtime::decoder::output::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE"]], "tensorrt_llm::runtime::decoder::output::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder::output::sequencelengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE"]], "tensorrt_llm::runtime::decoder_batch (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batchE"]], "tensorrt_llm::runtime::decoder_batch::input (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE"]], "tensorrt_llm::runtime::decoder_batch::input::input (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE"], [1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE"], [1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE"], [1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::input::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE"]], "tensorrt_llm::runtime::decoder_batch::input::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::input::active (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE"]], "tensorrt_llm::runtime::decoder_batch::input::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE"]], "tensorrt_llm::runtime::decoder_batch::input::logits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE"]], "tensorrt_llm::runtime::decoder_batch::input::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input12medusaLogitsE"]], "tensorrt_llm::runtime::decoder_batch::output (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE"]], "tensorrt_llm::runtime::decoder_batch::request (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE"]], "tensorrt_llm::runtime::decoder_batch::request::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::consttensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::request (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE"]], "tensorrt_llm::runtime::decoder_batch::request::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE"]], "tensorrt_llm::runtime::decoder_batch::request::badwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE"]], "tensorrt_llm::runtime::decoder_batch::request::computecumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE"]], "tensorrt_llm::runtime::decoder_batch::request::computelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE"]], "tensorrt_llm::runtime::decoder_batch::request::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE"]], "tensorrt_llm::runtime::decoder_batch::request::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE"]], "tensorrt_llm::runtime::decoder_batch::request::endid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE"]], "tensorrt_llm::runtime::decoder_batch::request::generatedtokensperenginestep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE"]], "tensorrt_llm::runtime::decoder_batch::request::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE"]], "tensorrt_llm::runtime::decoder_batch::request::inputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE"]], "tensorrt_llm::runtime::decoder_batch::request::maxnewtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE"]], "tensorrt_llm::runtime::decoder_batch::request::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE"]], "tensorrt_llm::runtime::decoder_batch::request::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE"]], "tensorrt_llm::runtime::decoder_batch::request::stopwordslist (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE"]], "tensorrt_llm::runtime::decoder_batch::token (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE"]], "tensorrt_llm::runtime::decoder_batch::token::token (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE"]], "tensorrt_llm::runtime::decoder_batch::token::active (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE"]], "tensorrt_llm::runtime::decoder_batch::token::event (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE"]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule"], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig"], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer"], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor"], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE"], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE"]], "tensorrt_llm::runtime::setpeeraccess (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb"]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig"], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE"]], "tensorrt_llm::runtime::utils (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime5utilsE"]], "tensorrt_llm::runtime::utils::loadengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE"]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[29, "tensorrt_llm.functional.AllReduceStrategy.AUTO"]], "allreduceconfig (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.AllReduceConfig"]], "allreducestrategy (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.AllReduceStrategy"]], "attentionmasktype (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.AttentionMaskType"]], "dimrange (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.DimRange"]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[29, "tensorrt_llm.functional.MLPType.FusedGatedMLP"]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[29, "tensorrt_llm.functional.MLPType.GatedMLP"]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[29, "tensorrt_llm.functional.LayerNormType.GroupNorm"]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[29, "tensorrt_llm.functional.LayerNormType.LayerNorm"]], "layernormpositiontype (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.LayerNormPositionType"]], "layernormtype (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.LayerNormType"]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[29, "tensorrt_llm.functional.MLPType.MLP"]], "mlptype (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.MLPType"]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[29, "tensorrt_llm.functional.AllReduceStrategy.NCCL"]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[29, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT"]], "push_mode (tensorrt_llm.functional.allreduceconfig attribute)": [[29, "tensorrt_llm.functional.AllReduceConfig.PUSH_MODE"]], "positionembeddingtype (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.PositionEmbeddingType"]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[29, "tensorrt_llm.functional.LayerNormType.RmsNorm"]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.RotaryScalingType"]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[29, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT"]], "tensor (class in tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.Tensor"]], "use_memcpy (tensorrt_llm.functional.allreduceconfig attribute)": [[29, "tensorrt_llm.functional.AllReduceConfig.USE_MEMCPY"]], "abs() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.abs"]], "abs() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.abs"]], "activation() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.activation"]], "add() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.add"]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.alibi"]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale"]], "allgather() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.allgather"]], "allreduce() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.allreduce"]], "arange() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.arange"]], "argmax() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.argmax"]], "assertion() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.assertion"]], "avg_pool2d() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.avg_pool2d"]], "bert_attention() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.bert_attention"]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[29, "tensorrt_llm.functional.AttentionMaskType.bidirectional"]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[29, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm"]], "broadcast_helper() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.broadcast_helper"]], "cast() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.cast"]], "cast() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.cast"]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[29, "tensorrt_llm.functional.AttentionMaskType.causal"]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.chatglm"]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.choices"]], "chunk() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.chunk"]], "clip() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.clip"]], "concat() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.concat"]], "conditional() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.conditional"]], "constant() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.constant"]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.constant_to_tensor_"]], "conv1d() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.conv1d"]], "conv2d() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.conv2d"]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.conv_transpose2d"]], "cos() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.cos"]], "cumsum() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.cumsum"]], "div() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.div"]], "dtype (tensorrt_llm.functional.tensor property)": [[29, "tensorrt_llm.functional.Tensor.dtype"]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[29, "tensorrt_llm.functional.RotaryScalingType.dynamic"]], "einsum() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.einsum"]], "elementwise_binary() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.elementwise_binary"]], "embedding() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.embedding"]], "eq() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.eq"]], "exp() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.exp"]], "expand() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.expand"]], "expand_dims() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.expand_dims"]], "expand_dims_like() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.expand_dims_like"]], "expand_mask() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.expand_mask"]], "flip() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.flip"]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.from_string"]], "gather() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.gather"]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.gather_last_token_logits"]], "geglu() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.geglu"]], "gelu() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.gelu"]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.generate_alibi_biases"]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.generate_alibi_slopes"]], "get_parent() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.get_parent"]], "get_users() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.get_users"]], "gpt_attention() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.gpt_attention"]], "group_norm() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.group_norm"]], "gt() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.gt"]], "identity() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.identity"]], "index_select() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.index_select"]], "interpolate() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.interpolate"]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi"]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.is_dynamic"]], "is_gated_activation() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.is_gated_activation"]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.is_rope"]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.is_trt_wrapper"]], "layer_norm() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.layer_norm"]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute"]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[29, "tensorrt_llm.functional.RotaryScalingType.linear"]], "location (tensorrt_llm.functional.tensor property)": [[29, "tensorrt_llm.functional.Tensor.location"]], "log() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.log"]], "log() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.log"]], "lora_plugin() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.lora_plugin"]], "lt() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.lt"]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.mamba_conv1d"]], "mark_output() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.mark_output"]], "masked_scatter() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.masked_scatter"]], "masked_select() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.masked_select"]], "matmul() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.matmul"]], "max() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.max"]], "max() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.max"]], "maximum() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.maximum"]], "mean() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.mean"]], "mean() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.mean"]], "minimum() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.minimum"]], "module": [[29, "module-tensorrt_llm"], [29, "module-tensorrt_llm.functional"], [30, "module-tensorrt_llm"], [30, "module-tensorrt_llm.layers.activation"], [30, "module-tensorrt_llm.layers.attention"], [30, "module-tensorrt_llm.layers.cast"], [30, "module-tensorrt_llm.layers.conv"], [30, "module-tensorrt_llm.layers.embedding"], [30, "module-tensorrt_llm.layers.linear"], [30, "module-tensorrt_llm.layers.mlp"], [30, "module-tensorrt_llm.layers.normalization"], [30, "module-tensorrt_llm.layers.pooling"], [31, "module-tensorrt_llm"], [31, "module-tensorrt_llm.models"], [32, "module-tensorrt_llm"], [32, "module-tensorrt_llm.plugin"], [33, "module-tensorrt_llm"], [33, "module-tensorrt_llm.quantization"], [34, "module-tensorrt_llm"], [34, "module-tensorrt_llm.runtime"]], "mul() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.mul"]], "name (tensorrt_llm.functional.tensor property)": [[29, "tensorrt_llm.functional.Tensor.name"]], "ndim() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.ndim"]], "network (tensorrt_llm.functional.tensor property)": [[29, "tensorrt_llm.functional.Tensor.network"]], "non_gated_version() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.non_gated_version"]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[29, "tensorrt_llm.functional.RotaryScalingType.none"]], "op_and() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.op_and"]], "op_or() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.op_or"]], "outer() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.outer"]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[29, "tensorrt_llm.functional.AttentionMaskType.padding"]], "permute() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.permute"]], "permute() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.permute"]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[29, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm"]], "pow() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.pow"]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[29, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm"]], "rank() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.rank"]], "recv() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.recv"]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.relative"]], "relu() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.relu"]], "repeat_interleave() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.repeat_interleave"]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.replace_all_uses_with"]], "rms_norm() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.rms_norm"]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox"]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[29, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj"]], "round() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.round"]], "select() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.select"]], "selective_scan() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.selective_scan"]], "send() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.send"]], "shape (tensorrt_llm.functional.tensor property)": [[29, "tensorrt_llm.functional.Tensor.shape"]], "shape() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.shape"]], "sigmoid() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.sigmoid"]], "silu() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.silu"]], "sin() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.sin"]], "size() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.size"]], "slice() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.slice"]], "softmax() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.softmax"]], "softplus() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.softplus"]], "split() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.split"]], "split() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.split"]], "sqrt() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.sqrt"]], "sqrt() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.sqrt"]], "squared_relu() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.squared_relu"]], "stack() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.stack"]], "sub() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.sub"]], "sum() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.sum"]], "swiglu() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.swiglu"]], "tanh() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.tanh"]], "tensorrt_llm": [[29, "module-tensorrt_llm"], [30, "module-tensorrt_llm"], [31, "module-tensorrt_llm"], [32, "module-tensorrt_llm"], [33, "module-tensorrt_llm"], [34, "module-tensorrt_llm"]], "tensorrt_llm.functional": [[29, "module-tensorrt_llm.functional"]], "topk() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.topk"]], "transpose() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.transpose"]], "transpose() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.transpose"]], "unary() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.unary"]], "unsqueeze() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.unsqueeze"]], "view() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.view"]], "view() (tensorrt_llm.functional.tensor method)": [[29, "tensorrt_llm.functional.Tensor.view"]], "where() (in module tensorrt_llm.functional)": [[29, "tensorrt_llm.functional.where"]], "attention (class in tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.Attention"]], "attentionparams (class in tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.AttentionParams"]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[30, "tensorrt_llm.layers.pooling.AvgPool2d"]], "bertattention (class in tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.BertAttention"]], "cast (class in tensorrt_llm.layers.cast)": [[30, "tensorrt_llm.layers.cast.Cast"]], "columnlinear (in module tensorrt_llm.layers.linear)": [[30, "tensorrt_llm.layers.linear.ColumnLinear"]], "conv1d (class in tensorrt_llm.layers.conv)": [[30, "tensorrt_llm.layers.conv.Conv1d"]], "conv2d (class in tensorrt_llm.layers.conv)": [[30, "tensorrt_llm.layers.conv.Conv2d"]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[30, "tensorrt_llm.layers.conv.ConvTranspose2d"]], "embedding (class in tensorrt_llm.layers.embedding)": [[30, "tensorrt_llm.layers.embedding.Embedding"]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[30, "tensorrt_llm.layers.mlp.FusedGatedMLP"]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[30, "tensorrt_llm.layers.mlp.GatedMLP"]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[30, "tensorrt_llm.layers.normalization.GroupNorm"]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.KeyValueCacheParams"]], "layernorm (class in tensorrt_llm.layers.normalization)": [[30, "tensorrt_llm.layers.normalization.LayerNorm"]], "linear (class in tensorrt_llm.layers.linear)": [[30, "tensorrt_llm.layers.linear.Linear"]], "mlp (class in tensorrt_llm.layers.mlp)": [[30, "tensorrt_llm.layers.mlp.MLP"]], "mish (class in tensorrt_llm.layers.activation)": [[30, "tensorrt_llm.layers.activation.Mish"]], "parallellmhead (class in tensorrt_llm.layers.linear)": [[30, "tensorrt_llm.layers.linear.ParallelLMHead"]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[30, "tensorrt_llm.layers.embedding.PromptTuningEmbedding"]], "qkvcolumnlinear (class in tensorrt_llm.layers.linear)": [[30, "tensorrt_llm.layers.linear.QKVColumnLinear"]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[30, "tensorrt_llm.layers.normalization.RmsNorm"]], "ropeembeddingutils (class in tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.RopeEmbeddingUtils"]], "rowlinear (class in tensorrt_llm.layers.linear)": [[30, "tensorrt_llm.layers.linear.RowLinear"]], "apply_rotary_pos_emb() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[30, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb"]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[30, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm"]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.compute_relative_bias"]], "create_sinusoidal_positions() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[30, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.create_sinusoidal_positions"]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[30, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list"]], "forward() (tensorrt_llm.layers.activation.mish method)": [[30, "tensorrt_llm.layers.activation.Mish.forward"]], "forward() (tensorrt_llm.layers.attention.attention method)": [[30, "tensorrt_llm.layers.attention.Attention.forward"]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[30, "tensorrt_llm.layers.attention.BertAttention.forward"]], "forward() (tensorrt_llm.layers.cast.cast method)": [[30, "tensorrt_llm.layers.cast.Cast.forward"]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[30, "tensorrt_llm.layers.conv.Conv1d.forward"]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[30, "tensorrt_llm.layers.conv.Conv2d.forward"]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[30, "tensorrt_llm.layers.conv.ConvTranspose2d.forward"]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[30, "tensorrt_llm.layers.embedding.Embedding.forward"]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[30, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward"]], "forward() (tensorrt_llm.layers.linear.linear method)": [[30, "tensorrt_llm.layers.linear.Linear.forward"]], "forward() (tensorrt_llm.layers.linear.rowlinear method)": [[30, "tensorrt_llm.layers.linear.RowLinear.forward"]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[30, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[30, "tensorrt_llm.layers.mlp.GatedMLP.forward"]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[30, "tensorrt_llm.layers.mlp.MLP.forward"]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[30, "tensorrt_llm.layers.normalization.GroupNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[30, "tensorrt_llm.layers.normalization.LayerNorm.forward"]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[30, "tensorrt_llm.layers.normalization.RmsNorm.forward"]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[30, "tensorrt_llm.layers.pooling.AvgPool2d.forward"]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[30, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value"]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[30, "tensorrt_llm.layers.attention.AttentionParams.is_valid"]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[30, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid"]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[30, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn"]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[30, "tensorrt_llm.layers.attention.make_causal_mask"]], "multiply_gather() (tensorrt_llm.layers.linear.linear method)": [[30, "tensorrt_llm.layers.linear.Linear.multiply_gather"]], "multiply_reduce() (tensorrt_llm.layers.linear.rowlinear method)": [[30, "tensorrt_llm.layers.linear.RowLinear.multiply_reduce"]], "rotate_every_two() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[30, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_every_two"]], "rotate_half() (tensorrt_llm.layers.attention.ropeembeddingutils static method)": [[30, "tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_half"]], "tensorrt_llm.layers.activation": [[30, "module-tensorrt_llm.layers.activation"]], "tensorrt_llm.layers.attention": [[30, "module-tensorrt_llm.layers.attention"]], "tensorrt_llm.layers.cast": [[30, "module-tensorrt_llm.layers.cast"]], "tensorrt_llm.layers.conv": [[30, "module-tensorrt_llm.layers.conv"]], "tensorrt_llm.layers.embedding": [[30, "module-tensorrt_llm.layers.embedding"]], "tensorrt_llm.layers.linear": [[30, "module-tensorrt_llm.layers.linear"]], "tensorrt_llm.layers.mlp": [[30, "module-tensorrt_llm.layers.mlp"]], "tensorrt_llm.layers.normalization": [[30, "module-tensorrt_llm.layers.normalization"]], "tensorrt_llm.layers.pooling": [[30, "module-tensorrt_llm.layers.pooling"]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[30, "tensorrt_llm.layers.embedding.Embedding.weight_loader"]], "weight_loader() (tensorrt_llm.layers.linear.linear method)": [[30, "tensorrt_llm.layers.linear.Linear.weight_loader"]], "weight_loader() (tensorrt_llm.layers.linear.parallellmhead method)": [[30, "tensorrt_llm.layers.linear.ParallelLMHead.weight_loader"]], "weight_loader() (tensorrt_llm.layers.linear.qkvcolumnlinear method)": [[30, "tensorrt_llm.layers.linear.QKVColumnLinear.weight_loader"]], "weight_loader() (tensorrt_llm.layers.linear.rowlinear method)": [[30, "tensorrt_llm.layers.linear.RowLinear.weight_loader"]], "baichuanforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.BaichuanForCausalLM"]], "bertforquestionanswering (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.BertForQuestionAnswering"]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.BertForSequenceClassification"]], "bertmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.BertModel"]], "bloomforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.BloomForCausalLM"]], "bloommodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.BloomModel"]], "chatglmforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.ChatGLMForCausalLM"]], "chatglmmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.ChatGLMModel"]], "decodermodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.DecoderModel"]], "encodermodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.EncoderModel"]], "falconforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.FalconForCausalLM"]], "falconmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.FalconModel"]], "gptforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GPTForCausalLM"]], "gptjforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GPTJForCausalLM"]], "gptjmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GPTJModel"]], "gptmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GPTModel"]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GPTNeoXForCausalLM"]], "gptneoxmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GPTNeoXModel"]], "gemmaforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.GemmaForCausalLM"]], "llamaforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.LLaMAForCausalLM"]], "llamamodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.LLaMAModel"]], "mptforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.MPTForCausalLM"]], "mptmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.MPTModel"]], "mambalmheadmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.MambaLMHeadModel"]], "medusaforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.MedusaForCausalLm"]], "optforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.OPTForCausalLM"]], "optmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.OPTModel"]], "phiforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.PhiForCausalLM"]], "phimodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.PhiModel"]], "pretrainedconfig (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.PretrainedConfig"]], "pretrainedmodel (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.PretrainedModel"]], "qwenforcausallm (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.QWenForCausalLM"]], "whisperencoder (class in tensorrt_llm.models)": [[31, "tensorrt_llm.models.WhisperEncoder"]], "check_config() (tensorrt_llm.models.chatglmforcausallm method)": [[31, "tensorrt_llm.models.ChatGLMForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[31, "tensorrt_llm.models.FalconForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.gptforcausallm method)": [[31, "tensorrt_llm.models.GPTForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.gptjforcausallm method)": [[31, "tensorrt_llm.models.GPTJForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.gemmaforcausallm method)": [[31, "tensorrt_llm.models.GemmaForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.llamaforcausallm method)": [[31, "tensorrt_llm.models.LLaMAForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[31, "tensorrt_llm.models.MPTForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[31, "tensorrt_llm.models.OPTForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[31, "tensorrt_llm.models.PhiForCausalLM.check_config"]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[31, "tensorrt_llm.models.PretrainedModel.check_config"]], "check_config() (tensorrt_llm.models.qwenforcausallm method)": [[31, "tensorrt_llm.models.QWenForCausalLM.check_config"]], "convert_hf_checkpoint() (tensorrt_llm.models.phiforcausallm class method)": [[31, "tensorrt_llm.models.PhiForCausalLM.convert_hf_checkpoint"]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[31, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config"]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[31, "tensorrt_llm.models.BertForQuestionAnswering.forward"]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[31, "tensorrt_llm.models.BertForSequenceClassification.forward"]], "forward() (tensorrt_llm.models.bertmodel method)": [[31, "tensorrt_llm.models.BertModel.forward"]], "forward() (tensorrt_llm.models.bloommodel method)": [[31, "tensorrt_llm.models.BloomModel.forward"]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[31, "tensorrt_llm.models.ChatGLMModel.forward"]], "forward() (tensorrt_llm.models.decodermodel method)": [[31, "tensorrt_llm.models.DecoderModel.forward"]], "forward() (tensorrt_llm.models.encodermodel method)": [[31, "tensorrt_llm.models.EncoderModel.forward"]], "forward() (tensorrt_llm.models.falconmodel method)": [[31, "tensorrt_llm.models.FalconModel.forward"]], "forward() (tensorrt_llm.models.gptjmodel method)": [[31, "tensorrt_llm.models.GPTJModel.forward"]], "forward() (tensorrt_llm.models.gptmodel method)": [[31, "tensorrt_llm.models.GPTModel.forward"]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[31, "tensorrt_llm.models.GPTNeoXModel.forward"]], "forward() (tensorrt_llm.models.llamamodel method)": [[31, "tensorrt_llm.models.LLaMAModel.forward"]], "forward() (tensorrt_llm.models.mptmodel method)": [[31, "tensorrt_llm.models.MPTModel.forward"]], "forward() (tensorrt_llm.models.mambalmheadmodel method)": [[31, "tensorrt_llm.models.MambaLMHeadModel.forward"]], "forward() (tensorrt_llm.models.medusaforcausallm method)": [[31, "tensorrt_llm.models.MedusaForCausalLm.forward"]], "forward() (tensorrt_llm.models.optmodel method)": [[31, "tensorrt_llm.models.OPTModel.forward"]], "forward() (tensorrt_llm.models.phimodel method)": [[31, "tensorrt_llm.models.PhiModel.forward"]], "forward() (tensorrt_llm.models.whisperencoder method)": [[31, "tensorrt_llm.models.WhisperEncoder.forward"]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[31, "tensorrt_llm.models.PretrainedModel.from_checkpoint"]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[31, "tensorrt_llm.models.PretrainedModel.from_config"]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[31, "tensorrt_llm.models.PretrainedConfig.from_dict"]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[31, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face"]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[31, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face"]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[31, "tensorrt_llm.models.PretrainedConfig.from_json_file"]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[31, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt"]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[31, "tensorrt_llm.models.PretrainedModel.load"]], "load_partial_weights() (tensorrt_llm.models.pretrainedmodel method)": [[31, "tensorrt_llm.models.PretrainedModel.load_partial_weights"]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[31, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[31, "tensorrt_llm.models.DecoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[31, "tensorrt_llm.models.EncoderModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.mambalmheadmodel method)": [[31, "tensorrt_llm.models.MambaLMHeadModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.medusaforcausallm method)": [[31, "tensorrt_llm.models.MedusaForCausalLm.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[31, "tensorrt_llm.models.PretrainedModel.prepare_inputs"]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[31, "tensorrt_llm.models.WhisperEncoder.prepare_inputs"]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[31, "tensorrt_llm.models.PretrainedConfig.quant_mode"]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[31, "tensorrt_llm.models.LLaMAForCausalLM.quantize"]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[31, "tensorrt_llm.models.PretrainedModel.quantize"]], "quantize_model() (in module tensorrt_llm.models)": [[31, "tensorrt_llm.models.quantize_model"]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[31, "tensorrt_llm.models.PretrainedModel.release"]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[31, "tensorrt_llm.models.PretrainedModel.save_checkpoint"]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[31, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist"]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[31, "tensorrt_llm.models.PretrainedConfig.set_rank"]], "tensorrt_llm.models": [[31, "module-tensorrt_llm.models"]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[31, "tensorrt_llm.models.PretrainedConfig.to_dict"]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[31, "tensorrt_llm.models.GPTForCausalLM.use_lora"]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[31, "tensorrt_llm.models.LLaMAForCausalLM.use_lora"]], "pluginconfig (class in tensorrt_llm.plugin)": [[32, "tensorrt_llm.plugin.PluginConfig"]], "tensorrt_llm.plugin": [[32, "module-tensorrt_llm.plugin"]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[32, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting"]], "quantalgo (class in tensorrt_llm.quantization)": [[33, "tensorrt_llm.quantization.QuantAlgo"]], "quantmode (class in tensorrt_llm.quantization)": [[33, "tensorrt_llm.quantization.QuantMode"]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[33, "tensorrt_llm.quantization.quantize_and_export"]], "tensorrt_llm.quantization": [[33, "module-tensorrt_llm.quantization"]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.ChatGLMGenerationSession"]], "generationsequence (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.GenerationSequence"]], "generationsession (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.GenerationSession"]], "kvcachemanager (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.KVCacheManager"]], "logitsprocessor (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.LogitsProcessor"]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.LogitsProcessorList"]], "mambalmheadmodelgenerationsession (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession"]], "modelconfig (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.ModelConfig"]], "modelrunner (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.ModelRunner"]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp"]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession"]], "samplingconfig (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.SamplingConfig"]], "session (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.Session"]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.StoppingCriteria"]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.StoppingCriteriaList"]], "tensorinfo (class in tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.TensorInfo"]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[34, "tensorrt_llm.runtime.KVCacheManager.add_sequence"]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.bad_words_list"]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.batch_size"]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate"]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.buffer_allocated"]], "context (tensorrt_llm.runtime.session property)": [[34, "tensorrt_llm.runtime.Session.context"]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.cross_attention"]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.cross_attention"]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode"]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard"]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.debug_mode"]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save"]], "decode() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.decode"]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.decode_batch"]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.decode_regular"]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.decode_stream"]], "device (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.device"]], "dtype (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.dtype"]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.dtype"]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.dtype"]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.dtype"]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[34, "tensorrt_llm.runtime.TensorInfo.dtype"]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers"]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria"]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.early_stopping"]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.end_id"]], "engine (tensorrt_llm.runtime.session property)": [[34, "tensorrt_llm.runtime.Session.engine"]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits"]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.finalize_decoder"]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path"]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.first_layer"]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty"]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[34, "tensorrt_llm.runtime.ModelRunner.from_dir"]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir"]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[34, "tensorrt_llm.runtime.ModelRunner.from_engine"]], "from_engine() (tensorrt_llm.runtime.session static method)": [[34, "tensorrt_llm.runtime.Session.from_engine"]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[34, "tensorrt_llm.runtime.Session.from_serialized_engine"]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.gather_context_logits"]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.gather_context_logits"]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.gather_context_logits"]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits"]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits"]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits"]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits"]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits"]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[34, "tensorrt_llm.runtime.ModelRunner.generate"]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.generate"]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[34, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate"]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[34, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx"]], "get_block_pointers() (tensorrt_llm.runtime.kvcachemanager method)": [[34, "tensorrt_llm.runtime.KVCacheManager.get_block_pointers"]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens"]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[34, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx"]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin"]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.handle_per_step"]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.has_position_embedding"]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.has_position_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding"]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding"]], "head_size (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.head_size"]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.head_size"]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.hidden_size"]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size"]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[34, "tensorrt_llm.runtime.Session.infer_shapes"]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode"]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.last_layer"]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.length_penalty"]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.lora_plugin"]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.lora_target_modules"]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin"]], "mamba_d_conv (tensorrt_llm.runtime.mambalmheadmodelgenerationsession property)": [[34, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_d_conv"]], "mamba_d_conv (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.mamba_d_conv"]], "mamba_d_state (tensorrt_llm.runtime.mambalmheadmodelgenerationsession property)": [[34, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_d_state"]], "mamba_d_state (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.mamba_d_state"]], "mamba_expand (tensorrt_llm.runtime.mambalmheadmodelgenerationsession property)": [[34, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_expand"]], "mamba_expand (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.mamba_expand"]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.mapping"]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size"]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.max_batch_size"]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.max_beam_width"]], "max_medusa_tokens (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.max_medusa_tokens"]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens"]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size"]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size"]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.max_sequence_length"]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length"]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.medusa_paths"]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets"]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.medusa_temperature"]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.medusa_topks"]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids"]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.min_length"]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.model_name"]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[34, "tensorrt_llm.runtime.TensorInfo.name"]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids"]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.num_beams"]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.num_heads"]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.num_heads"]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.num_heads"]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads"]], "num_heads_kv (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.num_heads_kv"]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.num_kv_heads"]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.num_layers"]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.num_layers"]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.num_layers"]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers"]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads"]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads"]], "num_medusa_tokens (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.num_medusa_tokens"]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs"]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.output_log_probs"]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths"]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.pad_id"]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache"]], "paged_kv_cache (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.paged_kv_cache"]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.paged_state"]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.paged_state"]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids"]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens"]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.presence_penalty"]], "process_logits_for_medusa_mode() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.process_logits_for_medusa_mode"]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.quant_mode"]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.quant_mode"]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.random_seed"]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.remove_input_padding"]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding"]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty"]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.return_dict"]], "run() (tensorrt_llm.runtime.session method)": [[34, "tensorrt_llm.runtime.Session.run"]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[34, "tensorrt_llm.runtime.GenerationSession.runtime"]], "runtime (tensorrt_llm.runtime.session property)": [[34, "tensorrt_llm.runtime.Session.runtime"]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[34, "tensorrt_llm.runtime.ModelRunner.serialize_engine"]], "set_shapes() (tensorrt_llm.runtime.session method)": [[34, "tensorrt_llm.runtime.Session.set_shapes"]], "setup() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.setup"]], "setup() (tensorrt_llm.runtime.mambalmheadmodelgenerationsession method)": [[34, "tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.setup"]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[34, "tensorrt_llm.runtime.TensorInfo.shape"]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.sink_token_length"]], "skip_cross_qkv (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.skip_cross_qkv"]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[34, "tensorrt_llm.runtime.KVCacheManager.step"]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.stop_words_list"]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.temperature"]], "tensorrt_llm.runtime": [[34, "module-tensorrt_llm.runtime"]], "to_word_list_format() (in module tensorrt_llm.runtime)": [[34, "tensorrt_llm.runtime.to_word_list_format"]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.tokens_per_block"]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.tokens_per_block"]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.top_k"]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.top_p"]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.top_p_decay"]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.top_p_min"]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids"]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules"]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[34, "tensorrt_llm.runtime.SamplingConfig.update"]], "update_kv_cache_draft_token_location() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.update_kv_cache_draft_token_location"]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[34, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset"]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[34, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps"]], "use_context_fmha_for_generation (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.use_context_fmha_for_generation"]], "use_context_fmha_for_generation (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.use_context_fmha_for_generation"]], "use_custom_all_reduce (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.use_custom_all_reduce"]], "use_custom_all_reduce (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.use_custom_all_reduce"]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin"]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin"]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin"]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin"]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[34, "tensorrt_llm.runtime.GenerationSession.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[34, "tensorrt_llm.runtime.ModelConfig.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.vocab_size"]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size"]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[34, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded"]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[34, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded"]]}})
						
						
					
				
				
					
						Reference in New Issue
					
					View Git Blame
					Copy Permalink