From ecd1ae1563df48d3e295f275a7014bd3cbfc0daf Mon Sep 17 00:00:00 2001 From: jingyaogong Date: Fri, 5 Dec 2025 23:08:29 +0800 Subject: [PATCH] [fix] reduce aux_loss_alpha --- model/model_minimind.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/model_minimind.py b/model/model_minimind.py index 4245000..ad62a68 100755 --- a/model/model_minimind.py +++ b/model/model_minimind.py @@ -34,7 +34,7 @@ class MiniMindConfig(PretrainedConfig): n_routed_experts: int = 4, n_shared_experts: int = 1, scoring_func: str = 'softmax', - aux_loss_alpha: float = 0.1, + aux_loss_alpha: float = 0.01, seq_aux: bool = True, norm_topk_prob: bool = True, **kwargs