mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
squash (#3642)
Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
This commit is contained in:
parent
715428cca9
commit
e36092bd40
@ -453,3 +453,4 @@ examples/test_medusa.py::test_codellama_medusa_1gpu[CodeLlama-7b-Instruct] SKIP
|
||||
examples/test_medusa.py::test_llama_medusa_1gpu[llama-v2-7b-hf] SKIP (https://nvbugs/5219534)
|
||||
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.2-1b] SKIP (https://nvbugs/5219534)
|
||||
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219535)
|
||||
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16] SKIP (https://nvbugspro.nvidia.com/bug/5226339)
|
||||
|
||||
@ -38,6 +38,7 @@ from utils.util import getSMVersion
|
||||
[torch.float16, torch.float32, torch.bfloat16],
|
||||
)
|
||||
def test_fp8_scaled_mm(output_dtype, m, k_n):
|
||||
pytest.skip("https://nvbugspro.nvidia.com/bug/5228279")
|
||||
# Skip specific problematic case
|
||||
if m == 228 and k_n == (28672, 8192):
|
||||
pytest.skip("Skipping problematic case with m=228, k=28672, n=8192")
|
||||
|
||||
@ -312,6 +312,7 @@ def test_llm_multi_node_pytorch():
|
||||
run_command(command)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5223608: timeout")
|
||||
@skip_single_gpu
|
||||
def test_llm_multi_node_with_postproc():
|
||||
# TODO[chunweiy]: reactivate this later
|
||||
|
||||
Loading…
Reference in New Issue
Block a user