mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Merge commit 'ed27a72bcf71f7ab0e7137f7999988c9de82386f' into feat/b300_cu13
Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
This commit is contained in:
commit
4cf9fed1e7
@ -380,7 +380,7 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
|
||||
// Wait 10 minutes to check status of the node again
|
||||
sleep(time: 10, unit: 'MINUTES')
|
||||
// Avoid the node being stuck in the held state.
|
||||
Utils.exec(pipeline, Utils.sshUserCmd(remote, "\"scontrol release ${slurmJobID} || true\""))
|
||||
Utils.exec(pipeline, script: Utils.sshUserCmd(remote, "\"scontrol release ${slurmJobID} || true\""), numRetries: 3)
|
||||
counter++
|
||||
}
|
||||
}
|
||||
|
||||
@ -208,10 +208,10 @@ Qwen3/Qwen3-30B-A3B:
|
||||
accuracy: 79.53
|
||||
- quant_algo: FP8
|
||||
kv_cache_quant_algo: FP8
|
||||
accuracy: 80.65
|
||||
accuracy: 79.53
|
||||
- quant_algo: NVFP4
|
||||
kv_cache_quant_algo: FP8
|
||||
accuracy: 80.65
|
||||
accuracy: 79.53
|
||||
- quant_algo: W4A8_MXFP4_FP8
|
||||
accuracy: 79.78
|
||||
- quant_algo: W4A8_MXFP4_MXFP8
|
||||
|
||||
Loading…
Reference in New Issue
Block a user