test: add INTEGRATION_TEST env var to speed up integration test (#3618)

add INTEGRATION_TEST env var Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-05-08 10:44:50 +08:00 · 2025-05-08 10:44:50 +08:00 · d7c51c953b
commit d7c51c953b
parent 81cc60a0fd
2 changed files with 24 additions and 6 deletions
--- a/examples/summarize.py
+++ b/examples/summarize.py
@ -42,6 +42,16 @@ from prompt_lookup.run_dtm_pld import run_dtm_pld


 def main(args):
+    is_integration_test = os.getenv('INTEGRATION_TEST', '0') == '1'
+    if is_integration_test:
+        logger.info(
+            "Running in integration test mode - will only run one batch and skip accuracy checks"
+        )
+        logger.info(
+            "Setting max_ite=1 and check_accuracy=False for integration test")
+        args.max_ite = 1
+        args.check_accuracy = False
+
    runtime_rank = tensorrt_llm.mpi_rank()
    logger.set_level(args.log_level)

--- a/tests/integration/defs/accuracy/accuracy_core.py
+++ b/tests/integration/defs/accuracy/accuracy_core.py
@ -151,13 +151,21 @@ class AccuracyTask:
            raise ValueError(
                f"Not recognized speculative_config: {llm.args.speculative_config}."
            )
+        is_integration_test = os.getenv('INTEGRATION_TEST', '0') == '1'

-        num_samples, threshold = self.get_num_samples_and_threshold(
-            dtype=llm.args.dtype,
-            quant_algo=llm.args.quant_config.quant_algo,
-            kv_cache_quant_algo=llm.args.quant_config.kv_cache_quant_algo,
-            spec_dec_algo=spec_dec_algo,
-            extra_acc_spec=extra_acc_spec)
+        if is_integration_test:
+            num_samples = 1
+            logger.info(
+                "Running in INTEGRATION_TEST mode: using only 1 sample and skipping accuracy verification"
+            )
+            threshold = 0
+        else:
+            num_samples, threshold = self.get_num_samples_and_threshold(
+                dtype=llm.args.dtype,
+                quant_algo=llm.args.quant_config.quant_algo,
+                kv_cache_quant_algo=llm.args.quant_config.kv_cache_quant_algo,
+                spec_dec_algo=spec_dec_algo,
+                extra_acc_spec=extra_acc_spec)

        sampling_params = SamplingParams(
            max_tokens=self.MAX_OUTPUT_LEN,