mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
test: add INTEGRATION_TEST env var to speed up integration test (#3618)
add INTEGRATION_TEST env var Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
parent
81cc60a0fd
commit
d7c51c953b
@ -42,6 +42,16 @@ from prompt_lookup.run_dtm_pld import run_dtm_pld
|
||||
|
||||
|
||||
def main(args):
|
||||
is_integration_test = os.getenv('INTEGRATION_TEST', '0') == '1'
|
||||
if is_integration_test:
|
||||
logger.info(
|
||||
"Running in integration test mode - will only run one batch and skip accuracy checks"
|
||||
)
|
||||
logger.info(
|
||||
"Setting max_ite=1 and check_accuracy=False for integration test")
|
||||
args.max_ite = 1
|
||||
args.check_accuracy = False
|
||||
|
||||
runtime_rank = tensorrt_llm.mpi_rank()
|
||||
logger.set_level(args.log_level)
|
||||
|
||||
|
||||
@ -151,13 +151,21 @@ class AccuracyTask:
|
||||
raise ValueError(
|
||||
f"Not recognized speculative_config: {llm.args.speculative_config}."
|
||||
)
|
||||
is_integration_test = os.getenv('INTEGRATION_TEST', '0') == '1'
|
||||
|
||||
num_samples, threshold = self.get_num_samples_and_threshold(
|
||||
dtype=llm.args.dtype,
|
||||
quant_algo=llm.args.quant_config.quant_algo,
|
||||
kv_cache_quant_algo=llm.args.quant_config.kv_cache_quant_algo,
|
||||
spec_dec_algo=spec_dec_algo,
|
||||
extra_acc_spec=extra_acc_spec)
|
||||
if is_integration_test:
|
||||
num_samples = 1
|
||||
logger.info(
|
||||
"Running in INTEGRATION_TEST mode: using only 1 sample and skipping accuracy verification"
|
||||
)
|
||||
threshold = 0
|
||||
else:
|
||||
num_samples, threshold = self.get_num_samples_and_threshold(
|
||||
dtype=llm.args.dtype,
|
||||
quant_algo=llm.args.quant_config.quant_algo,
|
||||
kv_cache_quant_algo=llm.args.quant_config.kv_cache_quant_algo,
|
||||
spec_dec_algo=spec_dec_algo,
|
||||
extra_acc_spec=extra_acc_spec)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
max_tokens=self.MAX_OUTPUT_LEN,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user