mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[https://nvbugs/5745152][fix] Fix some GPTOSS test setups (#10085)
Signed-off-by: Dongfeng Yu <dongfengy@nvidia.com>
This commit is contained in:
parent
4a5ef84dc2
commit
bfc591994c
@ -1091,11 +1091,13 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
"max_attention_window": [128, 32768],
|
||||
"enable_block_reuse": block_reuse,
|
||||
"enable_partial_reuse": False,
|
||||
"free_gpu_memory_fraction": 0.5,
|
||||
}
|
||||
gen_server_config["kv_cache_config"] = {
|
||||
"max_attention_window": [128, 32768],
|
||||
"enable_block_reuse": block_reuse,
|
||||
"enable_partial_reuse": False,
|
||||
"free_gpu_memory_fraction": 0.5,
|
||||
}
|
||||
disaggregated_server_config = {
|
||||
"hostname": "localhost",
|
||||
|
||||
@ -4369,6 +4369,11 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
|
||||
"https://nvbugs/5636916: Remaining Hopper Eagle Accuracy Issue for only TP=4"
|
||||
)
|
||||
|
||||
if not one_model and overlap_scheduler:
|
||||
pytest.skip(
|
||||
"https://nvbugs/5745152: two_model + overlap_scheduler can sometimes time out."
|
||||
)
|
||||
|
||||
MAX_OUTPUT_LEN = 128179
|
||||
MAX_INPUT_LEN = 32768
|
||||
|
||||
|
||||
@ -320,8 +320,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequan
|
||||
accuracy/test_llm_api_pytorch.py::TestNemotronH_47B_Base::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (https://nvbugs/5640697)
|
||||
accuracy/test_llm_api_pytorch.py::TestNemotronH_47B_Base::test_reasoning_fp8_prequantized[tp8ep8-cuda_graph=True] SKIP (https://nvbugs/5640697)
|
||||
accuracy/test_llm_api_pytorch.py::TestQwQ_32B::test_auto_dtype_tp4 SKIP (https://nvbugs/5640697)
|
||||
accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[True] SKIP (https://nvbugs/5644632)
|
||||
accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[False] SKIP (https://nvbugs/5644632)
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True] SKIP (https://nvbugs/5648560)
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-False] SKIP (https://nvbugs/5648560)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] SKIP (https://nvbugs/5629136)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user