[feat] update eval

2026-04-23 15:58:15 +08:00 · 2025-10-18 00:24:36 +08:00 · 2025-10-18 00:24:36 +08:00 · 3812a16d20
commit 3812a16d20
parent 0682134e21
1 changed files with 2 additions and 2 deletions
--- a/eval_model.py
+++ b/eval_model.py
@ -29,7 +29,7 @@ def init_model(args):
            apply_lora(model)
            load_lora(model, f'./{args.out_dir}/lora/{args.lora_name}_{args.hidden_size}.pth')
    else:
-        transformers_model_path = './MiniMind/MiniMind2'
+        transformers_model_path = './MiniMind2'
        tokenizer = AutoTokenizer.from_pretrained(transformers_model_path)
        model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True)
    print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.2f}M(illion)')
@ -112,7 +112,7 @@ def main():
    parser.add_argument('--num_hidden_layers', default=8, type=int)
    parser.add_argument('--max_seq_len', default=8192, type=int)
    parser.add_argument('--use_moe', default=False, type=bool)
-    parser.add_argument('--model_mode', default=5, type=int, help="0: 预训练模型，1: SFT-Chat模型，2: RLHF-Chat模型，3: Reason模型，4: RLAIF-Chat模型，6: Funcall-Chat模型")
+    parser.add_argument('--model_mode', default=1, type=int, help="0: 预训练模型，1: SFT-Chat模型，2: RLHF-Chat模型，3: Reason模型，4/5: RLAIF-Chat模型")
    # 启用长度外推，默认为4倍（注：仅解决位置编码外推问题，不代表模型真实具备长文本能力）
    parser.add_argument('--inference_rope_scaling', default=False, action='store_true')
    # 携带历史对话上下文条数history_cnt需要设为偶数，即【用户问题, 模型回答】为1组；设置为0时，即当前query不携带历史上文