mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5565549][fix] unwaive test_disaggregated_spec_dec_bat… (#8500)
Signed-off-by: Bo Deng <deemod@nvidia.com>
This commit is contained in:
parent
6631791c60
commit
9e30f14da8
@ -419,8 +419,9 @@ def test_disaggregated_spec_dec_batch_slot_limit(model, spec_dec_model_path,
|
||||
max_batch_size=1))
|
||||
|
||||
kv_cache_configs = [
|
||||
KvCacheConfig(max_tokens=128, enable_block_reuse=False)
|
||||
for _ in range(2)
|
||||
KvCacheConfig(max_tokens=128,
|
||||
enable_block_reuse=False,
|
||||
free_gpu_memory_fraction=0.4) for _ in range(2)
|
||||
]
|
||||
cache_transceiver_configs = [
|
||||
CacheTransceiverConfig(backend="DEFAULT") for _ in range(2)
|
||||
|
||||
@ -334,7 +334,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5565604)
|
||||
unittest/_torch/multi_gpu_modeling/test_llama3.py::test_llama_3_3 SKIP (https://nvbugs/5565559)
|
||||
disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_spec_dec_batch_slot_limit[False-False-EAGLE3-LLaMA3.1-Instruct-8B-Llama-3.1-8B-Instruct] SKIP (https://nvbugs/5565549)
|
||||
accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized SKIP (https://nvbugs/5565521)
|
||||
test_e2e.py::test_openai_chat_harmony SKIP (https://nvbugs/5575829)
|
||||
unittest/bindings/test_executor_bindings.py::test_request_perf_metrics_draft SKIP (https://nvbugs/5565590)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user