TensorRT-LLMs/examples/auto_deploy/flux_transforms.yaml
Ajinkya Rasane 8d7cda2318
[None][chore] Update the Flux autodeploy example (#8434)
Signed-off-by: ajrasane <131806219+ajrasane@users.noreply.github.com>
Co-authored-by: Frida Hou <201670829+Fridah-nv@users.noreply.github.com>
2025-11-18 14:16:04 -08:00

31 lines
803 B
YAML

# Configuration for Flux model transforms (export, optimizer, compile)
# Usage: python build_and_run_flux.py --config flux_transforms.yaml
# Export configuration
export:
clone: false
strict: false
# TODO: Integrate these transforms into the optimizer
# Optimizer configuration - FP8/FP4 quantization and fusion
# optimizer:
# quantize_fp8_from_graph:
# stage: "pattern_matcher"
# quantize_nvfp4_from_graph:
# stage: "pattern_matcher"
# fuse_fp8_gemms:
# stage: "post_load_fusion"
# fuse_fp4_gemms:
# stage: "post_load_fusion"
# fuse_fp8_linear:
# stage: "post_load_fusion"
# backend: "torch"
# fuse_nvfp4_linear:
# stage: "post_load_fusion"
# backend: "trtllm"
# Compilation configuration
compile:
backend: "torch-opt"
cuda_graph_batch_sizes: null