mirror of
https://github.com/jingyaogong/minimind.git
synced 2026-04-23 15:58:15 +08:00
[feat] update eval
This commit is contained in:
parent
0682134e21
commit
3812a16d20
@ -29,7 +29,7 @@ def init_model(args):
|
||||
apply_lora(model)
|
||||
load_lora(model, f'./{args.out_dir}/lora/{args.lora_name}_{args.hidden_size}.pth')
|
||||
else:
|
||||
transformers_model_path = './MiniMind/MiniMind2'
|
||||
transformers_model_path = './MiniMind2'
|
||||
tokenizer = AutoTokenizer.from_pretrained(transformers_model_path)
|
||||
model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True)
|
||||
print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.2f}M(illion)')
|
||||
@ -112,7 +112,7 @@ def main():
|
||||
parser.add_argument('--num_hidden_layers', default=8, type=int)
|
||||
parser.add_argument('--max_seq_len', default=8192, type=int)
|
||||
parser.add_argument('--use_moe', default=False, type=bool)
|
||||
parser.add_argument('--model_mode', default=5, type=int, help="0: 预训练模型,1: SFT-Chat模型,2: RLHF-Chat模型,3: Reason模型,4: RLAIF-Chat模型,6: Funcall-Chat模型")
|
||||
parser.add_argument('--model_mode', default=1, type=int, help="0: 预训练模型,1: SFT-Chat模型,2: RLHF-Chat模型,3: Reason模型,4/5: RLAIF-Chat模型")
|
||||
# 启用长度外推,默认为4倍(注:仅解决位置编码外推问题,不代表模型真实具备长文本能力)
|
||||
parser.add_argument('--inference_rope_scaling', default=False, action='store_true')
|
||||
# 携带历史对话上下文条数history_cnt需要设为偶数,即【用户问题, 模型回答】为1组;设置为0时,即当前query不携带历史上文
|
||||
|
||||
Loading…
Reference in New Issue
Block a user