mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* add gsm8k Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix gsm8k Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * add gpqa Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * conditional import lm_eval Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * gpqa in lm_eval Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * system prompt Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * shuffle Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * update AA prompt and regex Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * revert AA prompt and regex Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * integration to tests Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * add DS-R1 Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix and clean Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * update tests Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * update Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * clean up Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * free_gpu_memory_fraction=0.8 Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --------- Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import argparse
|
|
import re
|
|
|
|
import pandas as pd
|
|
|
|
metric_regex = {
|
|
"rouge1": r"(?<=rouge1: )\d+\.\d+",
|
|
"mmlu": r"(?<=MMLU weighted average accuracy: )\d+\.\d+",
|
|
"gsm8k": r"(?<=gsm8k average accuracy: )\d+\.\d+",
|
|
"gpqa_diamond":
|
|
r"(?<=gpqa_diamond_cot_zeroshot_aa average accuracy: )\d+\.\d+",
|
|
"perplexity": r"(?<=Per-token perplexity: )\d+\.\d+",
|
|
"passkey": r"(?<=passkey accuracy: )\d+\.\d+"
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--log_file", type=str, required=True)
|
|
parser.add_argument("--output_file", type=str, default=None)
|
|
args = parser.parse_args()
|
|
|
|
with open(args.log_file) as f:
|
|
log = f.read()
|
|
|
|
test_cases = re.search(r"(?<=items in this shard: ).+", log).group()
|
|
test_cases = test_cases.split(", ")
|
|
data = [{} for _ in test_cases]
|
|
|
|
log = log.split("\n")
|
|
i = -1
|
|
for line in log:
|
|
if i + 1 < len(test_cases) and line.startswith(test_cases[i + 1]):
|
|
# Advance to next test case
|
|
i += 1
|
|
continue
|
|
if i < 0:
|
|
continue
|
|
|
|
entry = data[i]
|
|
for metric, regex in metric_regex.items():
|
|
if metric in entry:
|
|
continue
|
|
matched = re.search(regex, line)
|
|
if matched:
|
|
entry[metric] = float(matched.group())
|
|
|
|
df = pd.DataFrame(data, index=test_cases)
|
|
print(df)
|
|
|
|
if args.output_file:
|
|
df.to_csv(args.output_file)
|