diff --git a/eval_model.py b/eval_model.py index 079bbb0..b872efa 100755 --- a/eval_model.py +++ b/eval_model.py @@ -29,7 +29,7 @@ def init_model(args): apply_lora(model) load_lora(model, f'./{args.out_dir}/lora/{args.lora_name}_{args.hidden_size}.pth') else: - transformers_model_path = './MiniMind/MiniMind2' + transformers_model_path = './MiniMind2' tokenizer = AutoTokenizer.from_pretrained(transformers_model_path) model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True) print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.2f}M(illion)') @@ -112,7 +112,7 @@ def main(): parser.add_argument('--num_hidden_layers', default=8, type=int) parser.add_argument('--max_seq_len', default=8192, type=int) parser.add_argument('--use_moe', default=False, type=bool) - parser.add_argument('--model_mode', default=5, type=int, help="0: 预训练模型,1: SFT-Chat模型,2: RLHF-Chat模型,3: Reason模型,4: RLAIF-Chat模型,6: Funcall-Chat模型") + parser.add_argument('--model_mode', default=1, type=int, help="0: 预训练模型,1: SFT-Chat模型,2: RLHF-Chat模型,3: Reason模型,4/5: RLAIF-Chat模型") # 启用长度外推,默认为4倍(注:仅解决位置编码外推问题,不代表模型真实具备长文本能力) parser.add_argument('--inference_rope_scaling', default=False, action='store_true') # 携带历史对话上下文条数history_cnt需要设为偶数,即【用户问题, 模型回答】为1组;设置为0时,即当前query不携带历史上文