[https://nvbugs/5766952][fix] Fix AIPerf issue. (#10666)

Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
dominicshanshan 2026-01-15 09:54:34 +08:00 committed by GitHub
parent 5f9fc50233
commit 0f2d61b8c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 11 deletions

View File

@ -806,7 +806,7 @@ def create_aiperf_command(model_name,
str(concurrency),
"--warmup-request-count",
str(warmup_request_count),
"--verbose",
# "--verbose",
]
@ -1145,15 +1145,21 @@ def run_accuracy_test(model_path: str,
return False, None
def extract_stress_test_metrics(artifacts_dir="./artifacts",
current_model=None):
def extract_stress_test_metrics(artifacts_dir=None, current_model=None):
"""
Extract stress test metrics from the artifacts directory
Args:
artifacts_dir (str): Path to the artifacts directory
artifacts_dir (str): Path to the artifacts directory. If None, defaults to
the 'artifacts' directory at the defs level (parent of stress_test)
current_model (str, optional): If provided, only analyze artifacts for this model
"""
# Set default artifacts_dir relative to this script's location
# The artifacts are at defs/artifacts/, one level up from stress_test/
if artifacts_dir is None:
script_dir = os.path.dirname(os.path.abspath(__file__))
artifacts_dir = os.path.join(script_dir, "..", "artifacts")
# Find all profile_export_aiperf.json files in the artifacts directory
json_files = glob(os.path.join(artifacts_dir,
"**/profile_export_aiperf.json"),
@ -1211,17 +1217,25 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
{}).get("avg", 0)
tokThroughput = results.get("output_token_throughput",
{}).get("avg", 0)
conCurrency = results.get("input_config", {}).get(
"perf_analyzer", {}).get("stimulus",
{}).get("concurrency", 0)
conCurrency = results.get("input_config",
{}).get("loadgen",
{}).get("concurrency", 0)
if conCurrency == 0:
conCurrency = results.get("input_config", {}).get(
"perf_analyzer", {}).get("stimulus",
{}).get("concurrency", 0)
# Try to determine model name from directory structure first
if first_dir in model_name_map:
modelName = model_name_map[first_dir]
else:
# Fall back to model name from JSON if we can't extract from directory
modelName = results.get("input_config",
{}).get("model", ["unknown"])
modelName = results.get("input_config", {}).get(
"endpoint", {}).get("model_names", None)
if modelName is None:
modelName = results.get("input_config",
{}).get("model_names",
["unknown"])
modelName = modelName[0] if isinstance(modelName,
list) else modelName

View File

@ -133,7 +133,6 @@ perf/test_perf.py::test_perf[bart_large_cnn-bench-float16-input_output_len:128,2
perf/test_perf.py::test_perf[mamba_130m-bench-float16-input_output_len:128,128] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
perf/test_perf.py::test_perf[bert_large-bench-float16-maxbs:32-input_len:128+512] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
perf/test_perf.py::test_perf[roberta_base-bench-float16-maxbs:32-input_len:128+512] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test] SKIP (https://nvbugs/5328495)
examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long] SKIP (https://nvbugs/5324976)
examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (https://nvbugs/5333849)
examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5333818)
@ -302,7 +301,6 @@ cpp/test_e2e.py::test_model[-redrafter-86] SKIP (https://nvbugs/5761642)
unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py::test_deep_gemm_in_subprocess[env2] SKIP (https://nvbugs/5766853)
test_e2e.py::test_openai_responses SKIP (https://nvbugs/5804146)
triton_server/test_triton.py::test_gpt_gather_logits[gpt-gather-logits] SKIP (https://nvbugs/5766960)
stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test] SKIP (https://nvbugs/5766952)
full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5596337)
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5768068)
test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010)