mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
parent
3a0ef73414
commit
c720d7f779
@ -26,6 +26,7 @@ MAX_SEQ_LEN = 4096 + 1024
|
||||
ids=["anchor1024", "anchor4096"])
|
||||
def test_model(backend, model_name, quant, sp_size, sa_block_size,
|
||||
sa_anchor_size):
|
||||
pytest.skip("https://nvbugs/5391679")
|
||||
quant_configs = {
|
||||
"bf16":
|
||||
QuantConfig(),
|
||||
|
||||
Loading…
Reference in New Issue
Block a user