diff --git a/tests/integration/defs/stress_test/stress_test.py b/tests/integration/defs/stress_test/stress_test.py index ade771cc5b..f81f0ab2bd 100644 --- a/tests/integration/defs/stress_test/stress_test.py +++ b/tests/integration/defs/stress_test/stress_test.py @@ -806,7 +806,7 @@ def create_aiperf_command(model_name, str(concurrency), "--warmup-request-count", str(warmup_request_count), - "--verbose", + # "--verbose", ] @@ -1145,15 +1145,21 @@ def run_accuracy_test(model_path: str, return False, None -def extract_stress_test_metrics(artifacts_dir="./artifacts", - current_model=None): +def extract_stress_test_metrics(artifacts_dir=None, current_model=None): """ Extract stress test metrics from the artifacts directory Args: - artifacts_dir (str): Path to the artifacts directory + artifacts_dir (str): Path to the artifacts directory. If None, defaults to + the 'artifacts' directory at the defs level (parent of stress_test) current_model (str, optional): If provided, only analyze artifacts for this model """ + # Set default artifacts_dir relative to this script's location + # The artifacts are at defs/artifacts/, one level up from stress_test/ + if artifacts_dir is None: + script_dir = os.path.dirname(os.path.abspath(__file__)) + artifacts_dir = os.path.join(script_dir, "..", "artifacts") + # Find all profile_export_aiperf.json files in the artifacts directory json_files = glob(os.path.join(artifacts_dir, "**/profile_export_aiperf.json"), @@ -1211,17 +1217,25 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts", {}).get("avg", 0) tokThroughput = results.get("output_token_throughput", {}).get("avg", 0) - conCurrency = results.get("input_config", {}).get( - "perf_analyzer", {}).get("stimulus", - {}).get("concurrency", 0) + conCurrency = results.get("input_config", + {}).get("loadgen", + {}).get("concurrency", 0) + if conCurrency == 0: + conCurrency = results.get("input_config", {}).get( + "perf_analyzer", {}).get("stimulus", + {}).get("concurrency", 0) # Try to determine model name from directory structure first if first_dir in model_name_map: modelName = model_name_map[first_dir] else: # Fall back to model name from JSON if we can't extract from directory - modelName = results.get("input_config", - {}).get("model", ["unknown"]) + modelName = results.get("input_config", {}).get( + "endpoint", {}).get("model_names", None) + if modelName is None: + modelName = results.get("input_config", + {}).get("model_names", + ["unknown"]) modelName = modelName[0] if isinstance(modelName, list) else modelName diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 73d357b205..c951848315 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -133,7 +133,6 @@ perf/test_perf.py::test_perf[bart_large_cnn-bench-float16-input_output_len:128,2 perf/test_perf.py::test_perf[mamba_130m-bench-float16-input_output_len:128,128] SKIP (https://nvbugspro.nvidia.com/bug/5295411) perf/test_perf.py::test_perf[bert_large-bench-float16-maxbs:32-input_len:128+512] SKIP (https://nvbugspro.nvidia.com/bug/5295411) perf/test_perf.py::test_perf[roberta_base-bench-float16-maxbs:32-input_len:128+512] SKIP (https://nvbugspro.nvidia.com/bug/5295411) -stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test] SKIP (https://nvbugs/5328495) examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long] SKIP (https://nvbugs/5324976) examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (https://nvbugs/5333849) examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5333818) @@ -302,7 +301,6 @@ cpp/test_e2e.py::test_model[-redrafter-86] SKIP (https://nvbugs/5761642) unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py::test_deep_gemm_in_subprocess[env2] SKIP (https://nvbugs/5766853) test_e2e.py::test_openai_responses SKIP (https://nvbugs/5804146) triton_server/test_triton.py::test_gpt_gather_logits[gpt-gather-logits] SKIP (https://nvbugs/5766960) -stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test] SKIP (https://nvbugs/5766952) full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5596337) accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5768068) test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010)