TensorRT-LLMs/examples/auto_deploy/flux_transforms.yaml

# Configuration for Flux model transforms (export, optimizer, compile)
# Usage: python build_and_run_flux.py --config flux_transforms.yaml

# Export configuration
export:
  clone: false
  strict: false

# TODO: Integrate these transforms into the optimizer
# Optimizer configuration - FP8/FP4 quantization and fusion
# optimizer:
#   quantize_fp8_from_graph:
#     stage: "pattern_matcher"
#   quantize_nvfp4_from_graph:
#     stage: "pattern_matcher"
#   fuse_fp8_gemms:
#     stage: "post_load_fusion"
#   fuse_fp4_gemms:
#     stage: "post_load_fusion"
#   fuse_fp8_linear:
#     stage: "post_load_fusion"
#     backend: "torch"
#   fuse_nvfp4_linear:
#     stage: "post_load_fusion"
#     backend: "trtllm"

# Compilation configuration
compile:
  backend: "torch-opt"
  cuda_graph_batch_sizes: null