mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Doc] Add Ascend NPU tab to the quickstart installation guide (#43550)
Signed-off-by: Aditya Singh <adisin650@gmail.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
@@ -76,6 +76,15 @@ This guide will help you quickly get started with vLLM to perform:
|
||||
!!! note
|
||||
For more detailed instructions, including Docker, installing from source, and troubleshooting, please refer to the [vLLM on TPU documentation](https://docs.vllm.ai/projects/tpu/en/latest/).
|
||||
|
||||
=== "Ascend NPU"
|
||||
|
||||
If you are using Ascend NPUs, you can run vLLM through [vLLM Ascend](https://github.com/vllm-project/vllm-ascend), a community-maintained hardware plugin.
|
||||
|
||||
Follow the installation instructions in the [vLLM Ascend quick start](https://docs.vllm.ai/projects/ascend/en/latest/quick_start.html).
|
||||
|
||||
!!! note
|
||||
Ascend setup depends on your NPU hardware and CANN version. For supported versions, Docker images, and troubleshooting, please refer to the [vLLM Ascend documentation](https://docs.vllm.ai/projects/ascend/en/latest/).
|
||||
|
||||
=== "Apple Silicon (Mac)"
|
||||
|
||||
If you are using Apple Silicon Macs, you can use vLLM-Metal for GPU-accelerated inference via Apple's Metal framework.
|
||||
|
||||
@@ -181,6 +181,8 @@ VALU = "VALU"
|
||||
# Walsh-Hadamard Transform
|
||||
wht = "wht"
|
||||
WHT = "WHT"
|
||||
# Huawei Compute Architecture for Neural Networks
|
||||
CANN = "CANN"
|
||||
|
||||
[tool.uv]
|
||||
no-build-isolation-package = ["torch"]
|
||||
|
||||
Reference in New Issue
Block a user