diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index a0db924269..44145728ae 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -342,6 +342,11 @@ BENCH_PERF_METRIC_LOG_QUERIES = { PerfMetricType.KV_CACHE_SIZE: re.compile(r".*(?:Allocated ([\d\.]+) GiB for max tokens in paged KV cache|" r"Final KV cache size after resize: ([\d\.]+) GiB).*"), + PerfMetricType.PER_USER_OUTPUT_THROUGHPUT: + re.compile( + r"Per User Output Throughput \[w\/ ctx\] \(tps\/user\):\s+([\d\.]+)"), + PerfMetricType.PER_GPU_OUTPUT_THROUGHPUT: + re.compile(r"Per GPU Output Throughput \(tps\/gpu\):\s+([\d\.]+)"), } AGGR_SERVER_PERF_METRIC_LOG_QUERIES = { @@ -457,6 +462,8 @@ PERF_METRIC_STRING = { PerfMetricType.ENGINE_SIZE: "engine_size", PerfMetricType.CONTEXT_GPU_MEMORY: "context_gpu_memory", PerfMetricType.KV_CACHE_SIZE: "kv_cache_size", + PerfMetricType.PER_USER_OUTPUT_THROUGHPUT: "per_user_output_throughput", + PerfMetricType.PER_GPU_OUTPUT_THROUGHPUT: "per_gpu_output_throughput", } BUILDER_METRICS = [ diff --git a/tests/integration/defs/perf/utils.py b/tests/integration/defs/perf/utils.py index 386138c090..2bae54aaa1 100644 --- a/tests/integration/defs/perf/utils.py +++ b/tests/integration/defs/perf/utils.py @@ -114,6 +114,8 @@ class PerfMetricType(str, Enum): KV_CACHE_SIZE = "KV_CACHE_SIZE" DISAGG_SERVER_E2EL = "DISAGG_SERVER_E2EL" DISAGG_SERVER_TTFT = "DISAGG_SERVER_TTFT" + PER_USER_OUTPUT_THROUGHPUT = "PER_USER_OUTPUT_THROUGHPUT" + PER_GPU_OUTPUT_THROUGHPUT = "PER_GPU_OUTPUT_THROUGHPUT" @contextlib.contextmanager