diff --git a/examples/auto_deploy/nano_v3.yaml b/examples/auto_deploy/nano_v3.yaml index 8844bc228e..4f91b403e0 100644 --- a/examples/auto_deploy/nano_v3.yaml +++ b/examples/auto_deploy/nano_v3.yaml @@ -45,3 +45,5 @@ transforms: fuse_mamba_a_log: stage: post_load_fusion enabled: true + insert_cached_ssm_attention: + backend: flashinfer_ssm diff --git a/examples/auto_deploy/super_v3.yaml b/examples/auto_deploy/super_v3.yaml index 56e7f292da..13b536a630 100644 --- a/examples/auto_deploy/super_v3.yaml +++ b/examples/auto_deploy/super_v3.yaml @@ -44,3 +44,5 @@ transforms: fuse_mamba_a_log: stage: post_load_fusion enabled: true + insert_cached_ssm_attention: + backend: flashinfer_ssm