mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-16 07:53:55 +08:00
[https://nvbugs/5804923][none] unwaive test (#11005)
Signed-off-by: Perkz Zheng <67892460+PerkzZheng@users.noreply.github.com>
This commit is contained in:
parent
ca9537e17c
commit
e0b11d6ea0
@ -257,7 +257,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_chunked_p
|
||||
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=True] SKIP (https://nvbugs/5800591)
|
||||
full:RTXPro6000D/accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5800672)
|
||||
examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (https://nvbugs/5802248)
|
||||
unittest/_torch/modeling/test_modeling_llama.py::TestLlama::test_llama_verification_with_kv_cache_relocation SKIP (https://nvbugs/5804923)
|
||||
accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5799901)
|
||||
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5799901)
|
||||
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_guided_decoding_4gpus[one_model] SKIP (https://nvbugs/5596343)
|
||||
|
||||
@ -406,7 +406,7 @@ class TestLlama(unittest.TestCase):
|
||||
|
||||
llama = LlamaForCausalLM(model_config).to(dtype).to(device)
|
||||
llama.load_weights(hf_llama.state_dict())
|
||||
num_blocks = 1
|
||||
num_blocks = 2
|
||||
tokens_per_block = 32
|
||||
head_dim = llama.config.hidden_size // llama.config.num_attention_heads
|
||||
num_layers = llama.config.num_hidden_layers
|
||||
|
||||
Loading…
Reference in New Issue
Block a user