[ROCm] Bump fastsafetensors to v0.3.2 from PyPI, remove git source build (#43625)

Signed-off-by: Turner Jabbour <doubleujabbour@gmail.com>
2026-06-06 00:16:14 +00:00 · 2026-06-04 15:30:57 +01:00
parent 68f5e565c9
commit 0c96dd64fb
8 changed files with 14 additions and 9 deletions
@@ -18,7 +18,7 @@ tilelang==0.1.9
 nvidia-cudnn-frontend>=1.13.0,<1.19.0

 # Required for faster safetensors model loading
-fastsafetensors >= 0.2.2
+fastsafetensors >= 0.3.2

 # QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
 nvidia-cutlass-dsl[cu13]==4.5.2
@@ -19,7 +19,10 @@ setuptools-rust>=1.9.0
 runai-model-streamer[s3,gcs,azure]==0.15.7
 conch-triton-kernels==1.2.1
 timm>=1.0.17
-# amd-quark: required for Quark quantization on ROCm 
+# amd-quark: required for Quark quantization on ROCm
 # To be consistent with test_quark.py
 amd-quark>=0.8.99
 tilelang==0.1.10
+
+# Required for faster safetensors model loading
+fastsafetensors >= 0.3.2
@@ -57,7 +57,7 @@ arctic-inference == 0.1.1; platform_machine == "x86_64" # Required for suffix de
 numba == 0.65.0 # Required for N-gram speculative decoding
 numpy
 runai-model-streamer[s3,gcs,azure]==0.15.7
-fastsafetensors>=0.2.2; platform_machine == "x86_64" # 0.2.2 contains important fixes for multi-GPU mem usage
+fastsafetensors>=0.3.2
 instanttensor>=0.1.5; platform_machine == "x86_64"
 pydantic>=2.12 # 2.11 leads to error on python 3.13
 decord==0.6.0; platform_machine == "x86_64"
@@ -191,7 +191,7 @@ fastparquet==2024.11.0
    # via genai-perf
 fastrlock==0.8.2
    # via cupy-cuda12x
-fastsafetensors==0.2.2
+fastsafetensors==0.3.2
    # via
    #   -c requirements/cuda.txt
    #   -r requirements/test/cuda.in
@@ -43,6 +43,6 @@ tritonclient>=2.51.0
 numba == 0.65.0 # Required for N-gram speculative decoding
 numpy
 runai-model-streamer[s3,gcs,azure]==0.15.7
-fastsafetensors>=0.2.2
+fastsafetensors>=0.3.2
 instanttensor>=0.1.5
 pydantic>=2.12 # 2.11 leads to error on python 3.13
@@ -56,7 +56,7 @@ arctic-inference==0.1.1 # Required for suffix decoding test
 numba==0.65.0 # Required for N-gram speculative decoding
 numpy
 runai-model-streamer[s3,gcs,azure]==0.15.7
-fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@0.2.2 # PyPI only ships CUDA wheels
+fastsafetensors>=0.3.2
 instanttensor>=0.1.5
 pydantic>=2.12 # 2.11 leads to error on python 3.13
 decord==0.6.0
@@ -240,8 +240,10 @@ fastar==0.10.0
    # via fastapi-cloud-cli
 fastparquet==2026.3.0
    # via genai-perf
-fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@65d80088fca7a8f567fba30415fbcc80f7d2259c
-    # via -r requirements/test/rocm.in
+fastsafetensors==0.3.2
+    # via
+    #   -c requirements/rocm.txt
+    #   -r requirements/test/rocm.in
 filelock==3.25.2
    # via
    #   -c requirements/common.txt
@@ -1168,7 +1168,7 @@ setup(
        "zen": ["zentorch==2.11.0.0"],
        "bench": ["pandas", "matplotlib", "seaborn", "datasets", "scipy", "plotly"],
        "tensorizer": ["tensorizer==2.10.1"],
-        "fastsafetensors": ["fastsafetensors >= 0.2.2"],
+        "fastsafetensors": ["fastsafetensors >= 0.3.2"],
        "instanttensor": ["instanttensor >= 0.1.5"],
        "runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
        "audio": [