mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
fix: Waive twoshot to fix acc issue (#3066)
* waive twoshot to fix acc issue Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com> --------- Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com>
This commit is contained in:
parent
644a01cbbe
commit
ec03159e60
@ -483,9 +483,9 @@ class DeepseekV3DecoderLayer(DecoderLayer):
|
||||
**kwargs,
|
||||
) -> torch.Tensor:
|
||||
|
||||
# deepseek allreduce kernel is better when m < 512
|
||||
# deepseek allreduce kernel is better when m < 512, two shot(128~512) has acc bug, waive
|
||||
using_prev_fusion = self.deepseek_allreduce_disabled or hidden_states.size(
|
||||
0) >= 512
|
||||
0) > 128
|
||||
|
||||
# Self Attention
|
||||
hidden_states = self.self_attn(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user