From 0c96dd64fb6a8005b1ba5e2ef335fe4f75d9eb90 Mon Sep 17 00:00:00 2001 From: Turner Jabbour Date: Thu, 4 Jun 2026 15:30:57 +0100 Subject: [PATCH] [ROCm] Bump fastsafetensors to v0.3.2 from PyPI, remove git source build (#43625) Signed-off-by: Turner Jabbour --- requirements/cuda.txt | 2 +- requirements/rocm.txt | 5 ++++- requirements/test/cuda.in | 2 +- requirements/test/cuda.txt | 2 +- requirements/test/nightly-torch.txt | 2 +- requirements/test/rocm.in | 2 +- requirements/test/rocm.txt | 6 ++++-- setup.py | 2 +- 8 files changed, 14 insertions(+), 9 deletions(-) diff --git a/requirements/cuda.txt b/requirements/cuda.txt index b0e16d11c75..479a949fd0d 100644 --- a/requirements/cuda.txt +++ b/requirements/cuda.txt @@ -18,7 +18,7 @@ tilelang==0.1.9 nvidia-cudnn-frontend>=1.13.0,<1.19.0 # Required for faster safetensors model loading -fastsafetensors >= 0.2.2 +fastsafetensors >= 0.3.2 # QuACK and Cutlass DSL for FA4 (cute-DSL implementation) nvidia-cutlass-dsl[cu13]==4.5.2 diff --git a/requirements/rocm.txt b/requirements/rocm.txt index 0520f4ca1e9..4ca70738303 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -19,7 +19,10 @@ setuptools-rust>=1.9.0 runai-model-streamer[s3,gcs,azure]==0.15.7 conch-triton-kernels==1.2.1 timm>=1.0.17 -# amd-quark: required for Quark quantization on ROCm +# amd-quark: required for Quark quantization on ROCm # To be consistent with test_quark.py amd-quark>=0.8.99 tilelang==0.1.10 + +# Required for faster safetensors model loading +fastsafetensors >= 0.3.2 diff --git a/requirements/test/cuda.in b/requirements/test/cuda.in index 6c786491603..344a58ec1bb 100644 --- a/requirements/test/cuda.in +++ b/requirements/test/cuda.in @@ -57,7 +57,7 @@ arctic-inference == 0.1.1; platform_machine == "x86_64" # Required for suffix de numba == 0.65.0 # Required for N-gram speculative decoding numpy runai-model-streamer[s3,gcs,azure]==0.15.7 -fastsafetensors>=0.2.2; platform_machine == "x86_64" # 0.2.2 contains important fixes for multi-GPU mem usage +fastsafetensors>=0.3.2 instanttensor>=0.1.5; platform_machine == "x86_64" pydantic>=2.12 # 2.11 leads to error on python 3.13 decord==0.6.0; platform_machine == "x86_64" diff --git a/requirements/test/cuda.txt b/requirements/test/cuda.txt index 245a86f93be..7d847d10577 100644 --- a/requirements/test/cuda.txt +++ b/requirements/test/cuda.txt @@ -191,7 +191,7 @@ fastparquet==2024.11.0 # via genai-perf fastrlock==0.8.2 # via cupy-cuda12x -fastsafetensors==0.2.2 +fastsafetensors==0.3.2 # via # -c requirements/cuda.txt # -r requirements/test/cuda.in diff --git a/requirements/test/nightly-torch.txt b/requirements/test/nightly-torch.txt index 9c70aa8b90e..89fd4ea9b43 100644 --- a/requirements/test/nightly-torch.txt +++ b/requirements/test/nightly-torch.txt @@ -43,6 +43,6 @@ tritonclient>=2.51.0 numba == 0.65.0 # Required for N-gram speculative decoding numpy runai-model-streamer[s3,gcs,azure]==0.15.7 -fastsafetensors>=0.2.2 +fastsafetensors>=0.3.2 instanttensor>=0.1.5 pydantic>=2.12 # 2.11 leads to error on python 3.13 diff --git a/requirements/test/rocm.in b/requirements/test/rocm.in index 97e0658fb10..0a615831774 100644 --- a/requirements/test/rocm.in +++ b/requirements/test/rocm.in @@ -56,7 +56,7 @@ arctic-inference==0.1.1 # Required for suffix decoding test numba==0.65.0 # Required for N-gram speculative decoding numpy runai-model-streamer[s3,gcs,azure]==0.15.7 -fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@0.2.2 # PyPI only ships CUDA wheels +fastsafetensors>=0.3.2 instanttensor>=0.1.5 pydantic>=2.12 # 2.11 leads to error on python 3.13 decord==0.6.0 diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt index c39f268709b..5a8e70946ad 100644 --- a/requirements/test/rocm.txt +++ b/requirements/test/rocm.txt @@ -240,8 +240,10 @@ fastar==0.10.0 # via fastapi-cloud-cli fastparquet==2026.3.0 # via genai-perf -fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@65d80088fca7a8f567fba30415fbcc80f7d2259c - # via -r requirements/test/rocm.in +fastsafetensors==0.3.2 + # via + # -c requirements/rocm.txt + # -r requirements/test/rocm.in filelock==3.25.2 # via # -c requirements/common.txt diff --git a/setup.py b/setup.py index 07374807bee..b674d55a14a 100644 --- a/setup.py +++ b/setup.py @@ -1168,7 +1168,7 @@ setup( "zen": ["zentorch==2.11.0.0"], "bench": ["pandas", "matplotlib", "seaborn", "datasets", "scipy", "plotly"], "tensorizer": ["tensorizer==2.10.1"], - "fastsafetensors": ["fastsafetensors >= 0.2.2"], + "fastsafetensors": ["fastsafetensors >= 0.3.2"], "instanttensor": ["instanttensor >= 0.1.5"], "runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"], "audio": [