mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[None][chore] Design diagram review process change (#8748)
Signed-off-by: Yibin Li <109242046+yibinl-nvidia@users.noreply.github.com>
This commit is contained in:
parent
782824533e
commit
871ea244a3
3
.github/CODEOWNERS
vendored
3
.github/CODEOWNERS
vendored
@ -186,6 +186,9 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
|
||||
## These scripts install and pin dependency versions
|
||||
/docker/common/** @NVIDIA/trt-llm-setup-infra-devs @NVIDIA/trt-llm-infra-devs @NVIDIA/trt-llm-oss-compliance
|
||||
|
||||
### TAVA Architecture Diagram
|
||||
/.github/tava_architecture_diagram.md @NVIDIA/trt-llm-TAVA-design-change
|
||||
|
||||
### CODEOWNERS file itself
|
||||
/.github/CODEOWNERS @NVIDIA/trt-llm-gh-workflows-infra-devs @NVIDIA/trt-llm-infra-devs @NVIDIA/trt-llm-oss-compliance
|
||||
|
||||
|
||||
1
.github/pull_request_template.md
vendored
1
.github/pull_request_template.md
vendored
@ -49,6 +49,7 @@ Please review the following before submitting your PR:
|
||||
- Any new dependencies have been scanned for license and vulnerabilities
|
||||
- [CODEOWNERS](https://github.com/NVIDIA/TensorRT-LLM/blob/main/.github/CODEOWNERS) updated if ownership changes
|
||||
- Documentation updated as needed
|
||||
- Update [tava architecture diagram](https://github.com/NVIDIA/TensorRT-LLM/blob/main/.github/tava_architecture_diagram.md) if there is a significant design change in PR.
|
||||
- The reviewers assigned automatically/manually are appropriate for the PR.
|
||||
|
||||
|
||||
|
||||
108
.github/tava_architecture_diagram.md
vendored
Normal file
108
.github/tava_architecture_diagram.md
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "User API & CLI Tools"
|
||||
CLI[CLI Tools]
|
||||
LLMAPI[LLM API]
|
||||
CLI --> LLMAPI
|
||||
end
|
||||
|
||||
subgraph "Model Checkpoint"
|
||||
Checkpoint[Huggingface Models]
|
||||
Checkpoint --> CLI
|
||||
Checkpoint --> LLMAPI
|
||||
end
|
||||
|
||||
subgraph "TensorRT_Flow"
|
||||
trtllmExecutor[trtllm.Executor]
|
||||
Engine[TensorRT Engine]
|
||||
TRTGraph[TensorRT Graph]
|
||||
Plugins[TensorRT Plugins]
|
||||
cudaKernel[CUDA Kernel]
|
||||
Executor[Executor]
|
||||
LLMAPI --> trtllmExecutor
|
||||
trtllmExecutor --> |build|Engine
|
||||
trtllmExecutor --> |compile|TRTGraph
|
||||
trtllmExecutor --> |compile|Plugins
|
||||
Engine --> Executor
|
||||
Plugins --> Executor
|
||||
TRTGraph --> Executor
|
||||
Plugins --> cudaKernel
|
||||
end
|
||||
|
||||
subgraph "PyTorch_Flow"
|
||||
PyExecutor[PyExecutor]
|
||||
PyEngine[PyTorch Engine]
|
||||
CustomOps[Custom Ops]
|
||||
PyTorchOps[Pytorch Ops]
|
||||
KernelLibs[Kernel Libs]
|
||||
PyScheduler[Scheduler]
|
||||
PyDecoder[Decoder]
|
||||
CUDAKernel[CUDA Kernel]
|
||||
LLMAPI --> PyExecutor
|
||||
PyExecutor --> PyEngine[PyTorch Engine]
|
||||
PyEngine --> CustomOps
|
||||
PyEngine --> PyTorchOps
|
||||
PyEngine --> KernelLibs
|
||||
PyEngine --> PyScheduler
|
||||
PyEngine --> PyDecoder
|
||||
KernelLibs --> CUDAKernel
|
||||
CustomOps --> CUDAKernel
|
||||
end
|
||||
|
||||
subgraph "Shared_Component"
|
||||
Shared_Decoder[Decoder]
|
||||
Shared_Scheduler[Scheduler]
|
||||
Sampling[Sampling]
|
||||
BatchManager[Batch Manager]
|
||||
KVCache[KV Cache Manager]
|
||||
PyScheduler --> |Pybind|Shared_Scheduler
|
||||
PyDecoder --> |Pybind|Shared_Decoder
|
||||
Executor --> Shared_Decoder
|
||||
Shared_Decoder --> Sampling
|
||||
Executor --> Shared_Scheduler[Scheduler]
|
||||
Shared_Scheduler --> |In-flight Batching| BatchManager
|
||||
BatchManager --> KVCache
|
||||
end
|
||||
|
||||
subgraph "Output_Results"
|
||||
Tokens[Generated Tokens]
|
||||
Stats[Performance Stats]
|
||||
Metrics[Accuracy Metrics]
|
||||
end
|
||||
|
||||
%% PyTorch_Flow ~~~ TensorRT_Flow
|
||||
|
||||
TensorRT_Flow --> Output_Results
|
||||
PyTorch_Flow --> Output_Results
|
||||
|
||||
%% Force Output_Results to be between PyTorch_flow and TensorRT_flow
|
||||
PyTorch_Flow ~~~ Output_Results
|
||||
|
||||
%% Model checkpoint format
|
||||
classDef checkpoint fill:#ff1,stroke:#333,stroke-width:2px;
|
||||
class Checkpoint checkpoint;
|
||||
|
||||
%% CLI tools format
|
||||
classDef cli fill:#f9f,stroke:#333,stroke-width:2px;
|
||||
class CLI cli;
|
||||
|
||||
%% TRT flow format
|
||||
classDef trt fill:#bbf,stroke:#333,stroke-width:2px;
|
||||
class trtllmExecutor,TRTGraph,Plugins,Engine,Executor,cudaKernel trt;
|
||||
|
||||
%% PyTorch flow format
|
||||
classDef pytorch fill:#8bf,stroke:#333,stroke-width:2px;
|
||||
class PyExecutor,PyEngine,CustomOps,PyTorchOps,KernelLibs,PyScheduler,PyDecoder,CUDAKernel pytorch;
|
||||
|
||||
%% Shared Componnet format
|
||||
classDef component fill:#fc8,stroke:#333,stroke-width:2px;
|
||||
class Shared_Decoder,Sampling,Shared_Scheduler,BatchManager,KVCache component;
|
||||
|
||||
%% APIs format
|
||||
classDef api fill:#bfb,stroke:#333,stroke-width:2px;
|
||||
class PythonAPI,CppAPI,LLMAPI api;
|
||||
|
||||
%% Results format
|
||||
classDef result fill:#fbb,stroke:#333,stroke-width:2px;
|
||||
class Tokens,Stats,Metrics result;
|
||||
```
|
||||
Loading…
Reference in New Issue
Block a user