mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5481087][fix] fix bug of ci when we use mocker (#7332)
Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
This commit is contained in:
parent
2b286ae613
commit
16e9d1121c
@ -2822,8 +2822,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
extra_evaluator_kwargs = {
|
||||
"fewshot_as_multiturn": True,
|
||||
"apply_chat_template": True,
|
||||
"scores_filter": "exact_match,flexible-extract",
|
||||
"MAX_OUTPUT_LEN": 8192
|
||||
}
|
||||
|
||||
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
|
||||
@ -2837,7 +2835,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
(True, True),
|
||||
])
|
||||
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
|
||||
pytest.skip("https://nvbugs/5481087")
|
||||
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
|
||||
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
|
||||
{"scores_filter": "exact_match,flexible-extract"})
|
||||
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
|
||||
pytest.skip("Triton kernels are not available")
|
||||
|
||||
@ -2855,7 +2855,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
|
||||
with llm:
|
||||
model_name = "GPT-OSS/MXFP4"
|
||||
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
|
||||
task = GSM8K(model_name)
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
|
||||
@ -2875,7 +2874,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
ids=["tp4", "ep4", "dp4"])
|
||||
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
|
||||
attention_dp, cuda_graph, overlap_scheduler, mocker):
|
||||
pytest.skip("https://nvbugs/5481087")
|
||||
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
|
||||
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
|
||||
{"scores_filter": "exact_match,flexible-extract"})
|
||||
if moe_backend == "TRITON":
|
||||
if not IS_TRITON_KERNELS_AVAILABLE:
|
||||
pytest.skip("Triton kernels are not available")
|
||||
@ -2896,7 +2897,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
with llm:
|
||||
model_name = "GPT-OSS/MXFP4"
|
||||
task = GSM8K(model_name)
|
||||
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
|
||||
|
||||
@ -2908,6 +2908,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
ids=["dp4"])
|
||||
def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
|
||||
overlap_scheduler, monkeypatch, mocker):
|
||||
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
|
||||
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
|
||||
{"scores_filter": "exact_match,flexible-extract"})
|
||||
if not IS_TRITON_KERNELS_AVAILABLE:
|
||||
pytest.skip("Triton kernels are not available")
|
||||
monkeypatch.setenv("OVERRIDE_QUANT_ALGO", "W4A16_MXFP4")
|
||||
@ -2927,7 +2930,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
with llm:
|
||||
model_name = "GPT-OSS/BF16"
|
||||
task = GSM8K(model_name)
|
||||
mocker.patch.object(GSM8K, {"MAX_OUTPUT_LEN": 8192})
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
|
||||
|
||||
|
||||
@ -331,11 +331,8 @@ accuracy/test_cli_flow.py::TestPhi4MiniInstruct::test_tp2 SKIP (https://nvbugs/5
|
||||
accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbugs/5481075)
|
||||
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5471106)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass] SKIP (https://nvbugs/5481080)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass] SKIP (https://nvbugs/5481080)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass] SKIP (https://nvbugs/5481080)
|
||||
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
|
||||
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False] SKIP (https://nvbugs/5481094)
|
||||
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)
|
||||
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP8-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user