test: add INTEGRATION_TEST env var to speed up integration test (#3618)

add INTEGRATION_TEST env var

Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
Ivy Zhang 2025-05-08 10:44:50 +08:00 committed by GitHub
parent 81cc60a0fd
commit d7c51c953b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 6 deletions

View File

@ -42,6 +42,16 @@ from prompt_lookup.run_dtm_pld import run_dtm_pld
def main(args):
is_integration_test = os.getenv('INTEGRATION_TEST', '0') == '1'
if is_integration_test:
logger.info(
"Running in integration test mode - will only run one batch and skip accuracy checks"
)
logger.info(
"Setting max_ite=1 and check_accuracy=False for integration test")
args.max_ite = 1
args.check_accuracy = False
runtime_rank = tensorrt_llm.mpi_rank()
logger.set_level(args.log_level)

View File

@ -151,13 +151,21 @@ class AccuracyTask:
raise ValueError(
f"Not recognized speculative_config: {llm.args.speculative_config}."
)
is_integration_test = os.getenv('INTEGRATION_TEST', '0') == '1'
num_samples, threshold = self.get_num_samples_and_threshold(
dtype=llm.args.dtype,
quant_algo=llm.args.quant_config.quant_algo,
kv_cache_quant_algo=llm.args.quant_config.kv_cache_quant_algo,
spec_dec_algo=spec_dec_algo,
extra_acc_spec=extra_acc_spec)
if is_integration_test:
num_samples = 1
logger.info(
"Running in INTEGRATION_TEST mode: using only 1 sample and skipping accuracy verification"
)
threshold = 0
else:
num_samples, threshold = self.get_num_samples_and_threshold(
dtype=llm.args.dtype,
quant_algo=llm.args.quant_config.quant_algo,
kv_cache_quant_algo=llm.args.quant_config.kv_cache_quant_algo,
spec_dec_algo=spec_dec_algo,
extra_acc_spec=extra_acc_spec)
sampling_params = SamplingParams(
max_tokens=self.MAX_OUTPUT_LEN,