TensorRT-LLMs/tests/scripts/allreduce_perf/allreduce_perf_viz.py
Yukun He d272f1a9bc
[TRTLLM-8821][feat] Apply AutoTuner to AllReduce Op for strategy tuning. (#8531)
Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com>
2026-01-05 15:44:37 +08:00

612 lines
22 KiB
Python

import os
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from tensorrt_llm._utils import get_sm_version
def visualize_2d_heatmap(df, fusion_op='NONE', save_path=None):
"""Visualize the allreduce dataframe as a 2D heatmap using seaborn.
Args:
df: DataFrame with columns: world_size, dtype, size, strategy, fusion, version, time (us)
save_path: Optional path to save the plot. If None, displays the plot.
Creates a 2D heatmap where:
- x-axis: num_tokens
- y-axis: hidden_size
- colors: time (us) values using seaborn heatmap
"""
if df is None or df.empty:
print("DataFrame is empty or None")
return
fusion_col = 'fusion' if 'fusion' in df.columns else 'fusion_op'
df_filtered = df[df[fusion_col] == fusion_op].copy()
if df_filtered.empty:
print(f"No data found for fusion == '{fusion_op}'")
return
# Determine column names (adapt to different data formats)
num_tokens_col = 'num_tokens'
time_col = 'time (us)' if 'time (us)' in df_filtered.columns else 'time_ms'
# Get unique strategies
strategies = df_filtered['strategy'].unique()
# Set seaborn style for better aesthetics
sns.set_style("whitegrid")
plt.style.use('default') # Reset to default style
# Create subplots for each strategy
n_strategies = len(strategies)
fig, axes = plt.subplots(1, n_strategies, figsize=(8 * n_strategies, 6))
# Handle single strategy case
if n_strategies == 1:
axes = [axes]
fig.suptitle(f'AllReduce Performance Heatmaps (Fusion: {fusion_op})',
fontsize=16,
fontweight='bold',
y=1.02)
# Calculate global min and max for consistent colorbar across all strategies
global_time_min = df_filtered[time_col].min()
global_time_max = df_filtered[time_col].max()
for i, strategy in enumerate(strategies):
ax = axes[i]
strategy_data = df_filtered[df_filtered['strategy'] == strategy]
if strategy_data.empty:
ax.set_title(f'{strategy} - No Data')
continue
# Create pivot table for heatmap
# Use num_tokens as index (y-axis) and hidden_size as columns (x-axis)
pivot_data = strategy_data.pivot_table(
index='hidden_size',
columns='num_tokens',
values=time_col,
aggfunc='mean' # Use mean if there are duplicate entries
)
# Sort indices and columns for better visualization
pivot_data = pivot_data.sort_index(
ascending=False) # Larger hidden_size at top
pivot_data = pivot_data.reindex(sorted(pivot_data.columns),
axis=1) # Sort columns
# Create seaborn heatmap with linear scaling
sns.heatmap(
pivot_data,
ax=ax,
cmap='Spectral_r',
cbar=False, # We'll add a shared colorbar later
fmt='.1f',
square=True, # Make cells square instead of rectangular
linewidths=0.5,
linecolor='white',
vmin=global_time_min,
vmax=global_time_max)
# Customize labels and title
ax.set_xlabel(f'{num_tokens_col.title()}',
fontweight='bold',
fontsize=12)
ax.set_ylabel('Hidden Size', fontweight='bold', fontsize=12)
ax.set_title(f'{strategy}', fontweight='bold', fontsize=14)
# Rotate x-axis labels for better readability
ax.tick_params(axis='x', rotation=45)
ax.tick_params(axis='y', rotation=0)
# Format tick labels to remove decimal points for integers
x_labels = [
f'{int(float(label.get_text()))}' for label in ax.get_xticklabels()
]
y_labels = [
f'{int(float(label.get_text()))}' for label in ax.get_yticklabels()
]
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
# Add a single shared colorbar for all subplots
fig.subplots_adjust(right=0.87) # Make room for colorbar
# Create a dummy plot for colorbar with linear scaling
sm = plt.cm.ScalarMappable(cmap='Spectral_r',
norm=plt.Normalize(vmin=global_time_min,
vmax=global_time_max))
sm.set_array([])
cbar_ax = fig.add_axes([0.89, 0.15, 0.01,
0.7]) # [left, bottom, width, height]
cbar = fig.colorbar(sm, cax=cbar_ax)
cbar.set_label('Time (μs)', fontweight='bold', fontsize=12)
plt.tight_layout()
plt.subplots_adjust(right=0.87) # Ensure colorbar space is maintained
# Save or show the plot
if save_path:
# Create directory if it doesn't exist
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"2D heatmap saved to: {save_path}")
plt.close() # Close the figure to free memory
else:
plt.show()
# Print some statistics
print(f"\n2D Heatmap Statistics:")
print(f"Strategies: {list(strategies)}")
print(
f"{num_tokens_col.title()} values: {sorted(df_filtered['num_tokens'].unique())}"
)
print(f"Hidden sizes: {sorted(df_filtered['hidden_size'].unique())}")
print(
f"Global time range (colorbar): {global_time_min:.4f} - {global_time_max:.4f} μs"
)
print(f"Total data points: {len(df_filtered)}")
def visualize_2d_best_strategy(df, fusion_op='NONE', save_path=None):
"""Visualize the best strategy for each mesh grid position as a 2D heatmap.
Args:
df: DataFrame with columns: world_size, dtype, size, strategy, fusion, version, time (us)
save_path: Optional path to save the plot. If None, displays the plot.
Creates a heat map where:
- x-axis: num_tokens
- y-axis: hidden_size
- colors: best strategy for each (num_tokens, hidden_size) combination
"""
if df is None or df.empty:
print("DataFrame is empty or None")
return
fusion_col = 'fusion' if 'fusion' in df.columns else 'fusion_op'
df_filtered = df[df[fusion_col] == fusion_op].copy()
# Filter out AUTO strategy
df_filtered = df_filtered[df_filtered['strategy'] != 'AUTO'].copy()
if df_filtered.empty:
print(
f"No data found for fusion == '{fusion_op}' after filtering out AUTO strategy"
)
return
# Determine column names (adapt to different data formats)
num_tokens_col = 'num_tokens'
time_col = 'time (us)' if 'time (us)' in df_filtered.columns else 'time_ms'
# Find the best strategy for each (num_tokens, hidden_size) combination
best_strategy_data = []
for (num_tokens, hidden_size), group in df_filtered.groupby(
['num_tokens', 'hidden_size']):
# Find the strategy with minimum time for this combination
best_row = group.loc[group[time_col].idxmin()]
best_strategy_data.append({
'num_tokens': num_tokens,
'hidden_size': hidden_size,
'best_strategy': best_row['strategy'],
'best_time': best_row[time_col]
})
best_df = pd.DataFrame(best_strategy_data)
# Get unique strategies and create a color mapping
strategies = sorted(df_filtered['strategy'].unique())
# Create a categorical color map with distinct colors
strategy_colors = plt.cm.Set3(np.linspace(0, 1, len(strategies)))
strategy_to_num = {strategy: i for i, strategy in enumerate(strategies)}
# Create pivot table for heatmap with strategy numbers
pivot_data = best_df.pivot_table(
index='hidden_size',
columns='num_tokens',
values='best_strategy',
aggfunc='first' # Should be unique anyway
)
# Convert strategy names to numbers for heatmap
pivot_numeric = pivot_data.applymap(lambda x: strategy_to_num[x]
if pd.notna(x) else np.nan)
# Sort indices and columns for better visualization
pivot_numeric = pivot_numeric.sort_index(
ascending=False) # Larger hidden_size at top
pivot_numeric = pivot_numeric.reindex(sorted(pivot_numeric.columns),
axis=1) # Sort columns
# Set seaborn style for better aesthetics
sns.set_style("whitegrid")
plt.style.use('default') # Reset to default style
# Create the figure
fig, ax = plt.subplots(figsize=(12, 8))
# Create custom colormap
cmap = ListedColormap(strategy_colors)
# Create seaborn heatmap with categorical data
sns.heatmap(
pivot_numeric,
ax=ax,
cmap=cmap,
cbar=False, # We'll create a custom colorbar
fmt='',
square=True, # Make cells square instead of rectangular
linewidths=1,
linecolor='white',
vmin=-0.5,
vmax=len(strategies) - 0.5)
# Create custom colorbar with strategy labels
cbar = plt.colorbar(plt.cm.ScalarMappable(cmap=cmap,
norm=plt.Normalize(
vmin=-0.5,
vmax=len(strategies) - 0.5)),
ax=ax,
ticks=range(len(strategies)))
cbar.ax.set_yticklabels(strategies)
cbar.set_label('Best Strategy', fontweight='bold', fontsize=12)
# Customize labels and title
ax.set_xlabel(f'{num_tokens_col.title()}', fontweight='bold', fontsize=14)
ax.set_ylabel('Hidden Size', fontweight='bold', fontsize=14)
ax.set_title(
f'Best AllReduce Strategy for Each Mesh Grid Position\n(Fusion: {fusion_op})',
fontweight='bold',
fontsize=16,
pad=20)
# Rotate x-axis labels for better readability
ax.tick_params(axis='x', rotation=45)
ax.tick_params(axis='y', rotation=0)
# Format tick labels to remove decimal points for integers
x_labels = [
f'{int(float(label.get_text()))}' for label in ax.get_xticklabels()
]
y_labels = [
f'{int(float(label.get_text()))}' for label in ax.get_yticklabels()
]
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
plt.tight_layout()
# Save or show the plot
if save_path:
# Create directory if it doesn't exist
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Best strategy heatmap saved to: {save_path}")
plt.close() # Close the figure to free memory
else:
plt.show()
# Print some statistics
print(f"\nBest Strategy Heatmap Statistics:")
print(f"Strategies found: {strategies}")
print(
f"{num_tokens_col.title()} values: {sorted(best_df['num_tokens'].unique())}"
)
print(f"Hidden sizes: {sorted(best_df['hidden_size'].unique())}")
print(f"Total grid positions: {len(best_df)}")
# Show strategy distribution
strategy_counts = best_df['best_strategy'].value_counts()
print(f"\nStrategy distribution:")
for strategy, count in strategy_counts.items():
percentage = (count / len(best_df)) * 100
print(f" {strategy}: {count} positions ({percentage:.1f}%)")
return best_df
def visualize_strategy_difference_heatmaps(df,
fusion_op='NONE',
save_path=None):
"""Generate 2D heatmaps showing the difference between each strategy and the best strategy.
Args:
df: DataFrame with columns: world_size, dtype, size, strategy, fusion, version, time (us)
save_path: Optional file path to save the plot. If None, displays the plots.
Creates difference heatmaps where:
- x-axis: num_tokens
- y-axis: hidden_size
- colors: time difference (current_strategy_time - best_time) in μs
- Generates one heatmap per strategy
"""
if df is None or df.empty:
print("DataFrame is empty or None")
return
fusion_col = 'fusion' if 'fusion' in df.columns else 'fusion_op'
df_filtered = df[df[fusion_col] == fusion_op].copy()
if df_filtered.empty:
print(f"No data found for fusion == '{fusion_op}'")
return
# Determine column names
num_tokens_col = 'num_tokens'
time_col = 'time (us)' if 'time (us)' in df_filtered.columns else 'time_ms'
# Find the best strategy and time for each (num_tokens, hidden_size) combination
best_times = {}
for (num_tokens, hidden_size), group in df_filtered.groupby(
['num_tokens', 'hidden_size']):
best_row = group.loc[group[time_col].idxmin()]
best_times[(num_tokens, hidden_size)] = best_row[time_col]
# Get unique strategies
strategies = sorted(df_filtered['strategy'].unique())
# Set seaborn style for better aesthetics
sns.set_style("whitegrid")
plt.style.use('default')
# Calculate difference data for all strategies
all_diff_data = []
max_diff = 0
min_diff = 0
for strategy in strategies:
strategy_data = df_filtered[df_filtered['strategy'] == strategy]
if strategy_data.empty:
continue
# Calculate differences for this strategy
diff_data = []
for _, row in strategy_data.iterrows():
num_tokens = row['num_tokens']
hidden_size = row['hidden_size']
current_time = row[time_col]
best_time = best_times.get((num_tokens, hidden_size))
if best_time is not None:
diff = (current_time - best_time) / best_time * 100
diff_data.append({
'num_tokens': num_tokens,
'hidden_size': hidden_size,
'diff (%)': diff,
'strategy': strategy
})
max_diff = max(max_diff, diff)
min_diff = min(min_diff, diff)
all_diff_data.extend(diff_data)
# Ensure we have data to visualize
if not all_diff_data:
print("No valid data found for difference calculation")
return
# Check if we have multiple strategies for meaningful comparison
if len(strategies) == 1:
print(
f"Warning: Only one strategy ({strategies[0]}) found. All differences will be zero."
)
print(
f"Generating difference heatmaps for {len(strategies)} strategies with {len(all_diff_data)} data points..."
)
# Create subplots for each strategy
n_strategies = len(strategies)
fig, axes = plt.subplots(1, n_strategies, figsize=(8 * n_strategies, 6))
# Handle single strategy case
if n_strategies == 1:
axes = [axes]
fig.suptitle(
f'Strategy Performance Difference from Best Strategy (Fusion: {fusion_op})',
fontsize=16,
fontweight='bold',
y=1.02)
# Use a colormap where 0 (best strategy) appears white
cmap = 'Reds' # White at 0 -> Red for worse performance
for i, strategy in enumerate(strategies):
ax = axes[i]
# Filter data for this strategy
strategy_diff_data = [
d for d in all_diff_data if d['strategy'] == strategy
]
if not strategy_diff_data:
ax.set_title(f'{strategy} - No Data')
continue
# Convert to DataFrame and create pivot table
strategy_df = pd.DataFrame(strategy_diff_data)
pivot_data = strategy_df.pivot_table(index='hidden_size',
columns='num_tokens',
values='diff (%)',
aggfunc='mean')
# Sort indices and columns for better visualization
pivot_data = pivot_data.sort_index(
ascending=False) # Larger hidden_size at top
pivot_data = pivot_data.reindex(sorted(pivot_data.columns),
axis=1) # Sort columns
# Determine annotation font size based on grid size
total_cells = pivot_data.shape[0] * pivot_data.shape[1]
annot_fontsize = 8 if total_cells <= 25 else 6 if total_cells <= 64 else 4
# Create seaborn heatmap with 0 values as white
sns.heatmap(
pivot_data,
ax=ax,
cmap=cmap,
cbar=False, # We'll add a shared colorbar later
fmt='.1f',
square=True,
linewidths=0.5,
linecolor='white',
vmin=0, # Force 0 (best strategy) to map to white
vmax=max(
max_diff,
0.1), # Ensure we have at least some range for visualization
annot=True, # Show values in cells
annot_kws={
'fontsize': annot_fontsize,
'weight': 'bold'
})
# Customize labels and title
ax.set_xlabel(f'{num_tokens_col.title()}',
fontweight='bold',
fontsize=12)
ax.set_ylabel('Hidden Size', fontweight='bold', fontsize=12)
ax.set_title(f'{strategy}\n(Difference from Best %)',
fontweight='bold',
fontsize=12)
# Rotate x-axis labels for better readability
ax.tick_params(axis='x', rotation=45)
ax.tick_params(axis='y', rotation=0)
# Format tick labels to remove decimal points for integers
x_labels = [
f'{int(float(label.get_text()))}' for label in ax.get_xticklabels()
]
y_labels = [
f'{int(float(label.get_text()))}' for label in ax.get_yticklabels()
]
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
# Add a single shared colorbar for all subplots
fig.subplots_adjust(right=0.87) # Make room for colorbar
# Create a dummy plot for colorbar with 0 values as white
sm = plt.cm.ScalarMappable(
cmap=cmap,
norm=plt.Normalize(vmin=0, vmax=max(max_diff,
0.1)) # Force 0 to map to white
)
sm.set_array([])
cbar_ax = fig.add_axes([0.89, 0.15, 0.01,
0.7]) # [left, bottom, width, height]
cbar = fig.colorbar(sm, cax=cbar_ax)
cbar.set_label('Time Difference (%)\n', fontweight='bold', fontsize=12)
plt.tight_layout()
plt.subplots_adjust(right=0.87) # Ensure colorbar space is maintained
# Save or show the plot
if save_path:
# Create parent directory if it doesn't exist
save_file_path = Path(save_path)
if not save_file_path.suffix:
# If no extension provided, add .png
save_file_path = save_file_path.with_suffix('.png')
save_file_path.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(save_file_path, dpi=300, bbox_inches='tight')
print(f"Strategy difference heatmaps saved to: {save_file_path}")
plt.close() # Close the figure to free memory
else:
plt.show()
# Print statistics
print(f"\nStrategy Difference Heatmap Statistics:")
print(f"Strategies analyzed: {strategies}")
print(f"Difference range: {min_diff:.2f} to {max_diff:.2f} μs")
print(f"Note: Positive values indicate slower than best strategy")
# Show per-strategy statistics
for strategy in strategies:
strategy_diffs = [
d['diff (%)'] for d in all_diff_data if d['strategy'] == strategy
]
if strategy_diffs:
avg_diff = np.mean(strategy_diffs)
max_diff_strategy = max(strategy_diffs)
print(
f" {strategy}: avg diff = {avg_diff:.2f} μs, max diff = {max_diff_strategy:.2f} μs"
)
def main():
# add args
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default=None)
args = parser.parse_args()
fusion_op_list = [
"NONE",
"RESIDUAL_RMS_NORM",
"RESIDUAL_RMS_NORM_QUANT_FP8",
"RESIDUAL_RMS_NORM_QUANT_NVFP4",
]
tp_size_list = [2, 4, 8]
if args.data_dir is None:
print("Please provide a data directory")
return
if not os.path.exists(os.path.join(args.data_dir, "viz")):
os.makedirs(os.path.join(args.data_dir, "viz"))
for tp_size in tp_size_list:
case_name = f"benchmark.tp{tp_size}.sm{get_sm_version()}"
fname = os.path.join(args.data_dir, case_name + ".csv")
if not (Path(fname)).exists():
print(f"File {fname} does not exist")
continue
df = pd.read_csv(Path(fname))
for fusion_op in fusion_op_list:
# if not exists, create the directory
if not os.path.exists(os.path.join(args.data_dir, "viz",
fusion_op)):
os.makedirs(os.path.join(args.data_dir, "viz", fusion_op))
if df is not None:
print(f"\n=== TP Size: {tp_size} ===")
print(df.head())
print(f"Data shape: {df.shape}")
# Create 2D heatmap visualization and save to data/viz directory
viz_path_heatmap = f"{args.data_dir}/viz/{fusion_op}/{case_name}_heatmap.png"
visualize_2d_heatmap(df, fusion_op, save_path=viz_path_heatmap)
# Create best strategy heatmap visualization and save to data/viz directory
viz_path_best_strategy = f"{args.data_dir}/viz/{fusion_op}/{case_name}_best_strategy.png"
visualize_2d_best_strategy(df,
fusion_op,
save_path=viz_path_best_strategy)
# Create strategy difference heatmaps and save to data/viz directory
viz_path_diff = f"{args.data_dir}/viz/{fusion_op}/{case_name}_strategy_difference_heatmap.png"
visualize_strategy_difference_heatmaps(df,
fusion_op,
save_path=viz_path_diff)
if __name__ == "__main__":
main()