mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-18 16:55:08 +08:00
tests: update api change from decoder to sampler in test (#4479)
update Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
parent
cc3f8e6431
commit
e977c75300
@ -187,10 +187,11 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5292517")
|
||||
@skip_pre_hopper
|
||||
def test_fp8_llm_decoder(self):
|
||||
def test_fp8_llm_sampler(self):
|
||||
model_path = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct-FP8"
|
||||
pytorch_config = PyTorchConfig(enable_trtllm_decoder=True)
|
||||
pytorch_config = PyTorchConfig(enable_trtllm_sampler=True)
|
||||
llm = LLM(model_path, pytorch_backend_config=pytorch_config)
|
||||
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
|
||||
|
||||
|
||||
@ -425,7 +425,7 @@ accuracy/test_llm_api.py::TestMixtral8x7B::test_tp2
|
||||
accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2
|
||||
accuracy/test_llm_api.py::TestMixtral8x7BInstruct::test_awq_tp2
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
|
||||
accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_fp8_prequantized_tp4
|
||||
|
||||
Loading…
Reference in New Issue
Block a user