[feat] update eval

This commit is contained in:
jingyaogong 2025-10-18 00:24:36 +08:00
parent 0682134e21
commit 3812a16d20

View File

@ -29,7 +29,7 @@ def init_model(args):
apply_lora(model)
load_lora(model, f'./{args.out_dir}/lora/{args.lora_name}_{args.hidden_size}.pth')
else:
transformers_model_path = './MiniMind/MiniMind2'
transformers_model_path = './MiniMind2'
tokenizer = AutoTokenizer.from_pretrained(transformers_model_path)
model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True)
print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.2f}M(illion)')
@ -112,7 +112,7 @@ def main():
parser.add_argument('--num_hidden_layers', default=8, type=int)
parser.add_argument('--max_seq_len', default=8192, type=int)
parser.add_argument('--use_moe', default=False, type=bool)
parser.add_argument('--model_mode', default=5, type=int, help="0: 预训练模型1: SFT-Chat模型2: RLHF-Chat模型3: Reason模型4: RLAIF-Chat模型6: Funcall-Chat模型")
parser.add_argument('--model_mode', default=1, type=int, help="0: 预训练模型1: SFT-Chat模型2: RLHF-Chat模型3: Reason模型4/5: RLAIF-Chat模型")
# 启用长度外推默认为4倍仅解决位置编码外推问题不代表模型真实具备长文本能力
parser.add_argument('--inference_rope_scaling', default=False, action='store_true')
# 携带历史对话上下文条数history_cnt需要设为偶数即【用户问题, 模型回答】为1组设置为0时即当前query不携带历史上文