mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 02:02:01 +08:00
[https://nvbugs/5766952][fix] Fix AIPerf issue. (#10666)
Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
5f9fc50233
commit
0f2d61b8c6
@ -806,7 +806,7 @@ def create_aiperf_command(model_name,
|
||||
str(concurrency),
|
||||
"--warmup-request-count",
|
||||
str(warmup_request_count),
|
||||
"--verbose",
|
||||
# "--verbose",
|
||||
]
|
||||
|
||||
|
||||
@ -1145,15 +1145,21 @@ def run_accuracy_test(model_path: str,
|
||||
return False, None
|
||||
|
||||
|
||||
def extract_stress_test_metrics(artifacts_dir="./artifacts",
|
||||
current_model=None):
|
||||
def extract_stress_test_metrics(artifacts_dir=None, current_model=None):
|
||||
"""
|
||||
Extract stress test metrics from the artifacts directory
|
||||
|
||||
Args:
|
||||
artifacts_dir (str): Path to the artifacts directory
|
||||
artifacts_dir (str): Path to the artifacts directory. If None, defaults to
|
||||
the 'artifacts' directory at the defs level (parent of stress_test)
|
||||
current_model (str, optional): If provided, only analyze artifacts for this model
|
||||
"""
|
||||
# Set default artifacts_dir relative to this script's location
|
||||
# The artifacts are at defs/artifacts/, one level up from stress_test/
|
||||
if artifacts_dir is None:
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
artifacts_dir = os.path.join(script_dir, "..", "artifacts")
|
||||
|
||||
# Find all profile_export_aiperf.json files in the artifacts directory
|
||||
json_files = glob(os.path.join(artifacts_dir,
|
||||
"**/profile_export_aiperf.json"),
|
||||
@ -1211,17 +1217,25 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
|
||||
{}).get("avg", 0)
|
||||
tokThroughput = results.get("output_token_throughput",
|
||||
{}).get("avg", 0)
|
||||
conCurrency = results.get("input_config", {}).get(
|
||||
"perf_analyzer", {}).get("stimulus",
|
||||
{}).get("concurrency", 0)
|
||||
conCurrency = results.get("input_config",
|
||||
{}).get("loadgen",
|
||||
{}).get("concurrency", 0)
|
||||
if conCurrency == 0:
|
||||
conCurrency = results.get("input_config", {}).get(
|
||||
"perf_analyzer", {}).get("stimulus",
|
||||
{}).get("concurrency", 0)
|
||||
|
||||
# Try to determine model name from directory structure first
|
||||
if first_dir in model_name_map:
|
||||
modelName = model_name_map[first_dir]
|
||||
else:
|
||||
# Fall back to model name from JSON if we can't extract from directory
|
||||
modelName = results.get("input_config",
|
||||
{}).get("model", ["unknown"])
|
||||
modelName = results.get("input_config", {}).get(
|
||||
"endpoint", {}).get("model_names", None)
|
||||
if modelName is None:
|
||||
modelName = results.get("input_config",
|
||||
{}).get("model_names",
|
||||
["unknown"])
|
||||
modelName = modelName[0] if isinstance(modelName,
|
||||
list) else modelName
|
||||
|
||||
|
||||
@ -133,7 +133,6 @@ perf/test_perf.py::test_perf[bart_large_cnn-bench-float16-input_output_len:128,2
|
||||
perf/test_perf.py::test_perf[mamba_130m-bench-float16-input_output_len:128,128] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
|
||||
perf/test_perf.py::test_perf[bert_large-bench-float16-maxbs:32-input_len:128+512] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
|
||||
perf/test_perf.py::test_perf[roberta_base-bench-float16-maxbs:32-input_len:128+512] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
|
||||
stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test] SKIP (https://nvbugs/5328495)
|
||||
examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long] SKIP (https://nvbugs/5324976)
|
||||
examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (https://nvbugs/5333849)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5333818)
|
||||
@ -302,7 +301,6 @@ cpp/test_e2e.py::test_model[-redrafter-86] SKIP (https://nvbugs/5761642)
|
||||
unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py::test_deep_gemm_in_subprocess[env2] SKIP (https://nvbugs/5766853)
|
||||
test_e2e.py::test_openai_responses SKIP (https://nvbugs/5804146)
|
||||
triton_server/test_triton.py::test_gpt_gather_logits[gpt-gather-logits] SKIP (https://nvbugs/5766960)
|
||||
stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test] SKIP (https://nvbugs/5766952)
|
||||
full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5596337)
|
||||
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5768068)
|
||||
test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user