diff --git a/eval_model.py b/eval_llm.py
old mode 100644
new mode 100755
similarity index 76%
rename from eval_model.py
rename to eval_llm.py
index f0dad41..a41b331
--- a/eval_model.py
+++ b/eval_llm.py
@@ -13,13 +13,14 @@ def init_model(args):
tokenizer = AutoTokenizer.from_pretrained('./model/')
if args.load == 0:
moe_path = '_moe' if args.use_moe else ''
- modes = {0: 'pretrain', 1: 'full_sft', 2: 'rlhf', 3: 'reason', 4: 'grpo'}
+ modes = {0: 'pretrain', 1: 'full_sft', 2: 'rlhf', 3: 'reason', 4: 'ppo_actor', 5: 'grpo'}
ckp = f'./{args.out_dir}/{modes[args.model_mode]}_{args.hidden_size}{moe_path}.pth'
model = MiniMindForCausalLM(MiniMindConfig(
hidden_size=args.hidden_size,
num_hidden_layers=args.num_hidden_layers,
- use_moe=args.use_moe
+ use_moe=args.use_moe,
+ inference_rope_scaling=args.inference_rope_scaling
))
model.load_state_dict(torch.load(ckp, map_location=args.device), strict=True)
@@ -28,7 +29,7 @@ def init_model(args):
apply_lora(model)
load_lora(model, f'./{args.out_dir}/lora/{args.lora_name}_{args.hidden_size}.pth')
else:
- transformers_model_path = './MiniMind2'
+ transformers_model_path = './MiniMind2-MoE'
tokenizer = AutoTokenizer.from_pretrained(transformers_model_path)
model = AutoModelForCausalLM.from_pretrained(transformers_model_path, trust_remote_code=True)
print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.2f}M(illion)')
@@ -48,8 +49,8 @@ def get_prompt_datas(args):
'杭州市的美食有'
]
else:
+ # 非LoRA模型的通用对话问题
if args.lora_name == 'None':
- # 通用对话问题
prompt_datas = [
'请介绍一下自己。',
'你更擅长哪一个学科?',
@@ -62,7 +63,7 @@ def get_prompt_datas(args):
'Introduce the history of the United States, please.'
]
else:
- # 特定领域问题
+ # LoRA微调模型的特定领域问题
lora_prompt_datas = {
'lora_identity': [
"你是ChatGPT吧。",
@@ -111,13 +112,14 @@ def main():
parser.add_argument('--num_hidden_layers', default=8, type=int)
parser.add_argument('--max_seq_len', default=8192, type=int)
parser.add_argument('--use_moe', default=False, type=bool)
- # 携带历史对话上下文条数
- # history_cnt需要设为偶数,即【用户问题, 模型回答】为1组;设置为0时,即当前query不携带历史上文
- # 模型未经过外推微调时,在更长的上下文的chat_template时难免出现性能的明显退化,因此需要注意此处设置
+ parser.add_argument('--model_mode', default=5, type=int, help="0: 预训练模型,1: SFT-Chat模型,2: RLHF-Chat模型,3: Reason模型,4: RLAIF-Chat模型,6: Funcall-Chat模型")
+ # 启用长度外推,默认为4倍(注:仅解决位置编码外推问题,不代表模型真实具备长文本能力)
+ parser.add_argument('--inference_rope_scaling', default=False, action='store_true')
+ # 携带历史对话上下文条数history_cnt需要设为偶数,即【用户问题, 模型回答】为1组;设置为0时,即当前query不携带历史上文
+ # 模型未经过多轮对话微调时,在多轮次的长上下文难免出现能力的明显退化,因此需要注意此处设置
parser.add_argument('--history_cnt', default=0, type=int)
- parser.add_argument('--load', default=0, type=int, help="0: 原生torch权重,1: transformers加载")
- parser.add_argument('--model_mode', default=1, type=int,
- help="0: 预训练模型,1: SFT-Chat模型,2: RLHF-Chat模型,3: Reason模型,4: RLAIF-Chat模型")
+ # load模式为1时,前置hidden_size、num_hidden_layers、max_seq_len等参数失效,即以加载的transformers模型的config.json配置为准
+ parser.add_argument('--load', default=1, type=int, help="0: 原生torch权重,1: transformers加载")
args = parser.parse_args()
model, tokenizer = init_model(args)
@@ -128,18 +130,27 @@ def main():
messages = []
for idx, prompt in enumerate(prompts if test_mode == 0 else iter(lambda: input('👶: '), '')):
- setup_seed(random.randint(0, 2048))
- # setup_seed(2025) # 如需固定每次输出则换成【固定】的随机种子
+ # setup_seed(random.randint(0, 2048))
+ setup_seed(2026) # 如需固定每次输出则换成【固定】的随机种子
if test_mode == 0: print(f'👶: {prompt}')
messages = messages[-args.history_cnt:] if args.history_cnt else []
messages.append({"role": "user", "content": prompt})
- new_prompt = tokenizer.apply_chat_template(
- messages,
- tokenize=False,
- add_generation_prompt=True
- ) if args.model_mode != 0 else (tokenizer.bos_token + prompt)
+ # 1. Pretrain:接龙模型
+ if args.model_mode == 0:
+ new_prompt = tokenizer.bos_token + prompt
+ # 2. SFT/RL:聊天模型
+ else:
+ template_args = {
+ "conversation": messages,
+ "tokenize": False,
+ "add_generation_prompt": True
+ }
+ # 只可对Reason模型使用,非思考模型不能加此参数
+ if args.model_mode == 3:
+ template_args["enable_thinking"] = True # False则关闭think
+ new_prompt = tokenizer.apply_chat_template(**template_args)
inputs = tokenizer(
new_prompt,
diff --git a/others/HF-Space/app.py b/others/HF-Space/app.py
new file mode 100644
index 0000000..e056546
--- /dev/null
+++ b/others/HF-Space/app.py
@@ -0,0 +1,320 @@
+import random
+import re
+from threading import Thread
+
+import torch
+import numpy as np
+import streamlit as st
+
+st.set_page_config(page_title="MiniMind", initial_sidebar_state="collapsed")
+
+st.markdown("""
+
+""", unsafe_allow_html=True)
+
+system_prompt = []
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def process_assistant_content(content):
+ if model_source == "API" and 'R1' not in api_model_name:
+ return content
+ if model_source != "API" and 'R1' not in MODEL_PATHS[selected_model][1]:
+ return content
+
+ if '' in content and '' in content:
+ content = re.sub(r'()(.*?)()',
+ r'推理内容(展开)
\2 ',
+ content,
+ flags=re.DOTALL)
+
+ if '' in content and '' not in content:
+ content = re.sub(r'(.*?)$',
+ r'推理中...
\1 ',
+ content,
+ flags=re.DOTALL)
+
+ if '' not in content and '' in content:
+ content = re.sub(r'(.*?)',
+ r'推理内容(展开)
\1 ',
+ content,
+ flags=re.DOTALL)
+
+ return content
+
+
+@st.cache_resource
+def load_model_tokenizer(model_path):
+ model = AutoModelForCausalLM.from_pretrained(
+ model_path,
+ trust_remote_code=True
+ )
+ tokenizer = AutoTokenizer.from_pretrained(
+ model_path,
+ trust_remote_code=True
+ )
+ model = model.eval().to(device)
+ return model, tokenizer
+
+
+def clear_chat_messages():
+ del st.session_state.messages
+ del st.session_state.chat_messages
+
+
+def init_chat_messages():
+ if "messages" in st.session_state:
+ for i, message in enumerate(st.session_state.messages):
+ if message["role"] == "assistant":
+ with st.chat_message("assistant", avatar=image_url):
+ st.markdown(process_assistant_content(message["content"]), unsafe_allow_html=True)
+ if st.button("🗑", key=f"delete_{i}"):
+ st.session_state.messages.pop(i)
+ st.session_state.messages.pop(i - 1)
+ st.session_state.chat_messages.pop(i)
+ st.session_state.chat_messages.pop(i - 1)
+ st.rerun()
+ else:
+ st.markdown(
+ f'
',
+ unsafe_allow_html=True)
+
+ else:
+ st.session_state.messages = []
+ st.session_state.chat_messages = []
+
+ return st.session_state.messages
+
+
+def regenerate_answer(index):
+ st.session_state.messages.pop()
+ st.session_state.chat_messages.pop()
+ st.rerun()
+
+
+def delete_conversation(index):
+ st.session_state.messages.pop(index)
+ st.session_state.messages.pop(index - 1)
+ st.session_state.chat_messages.pop(index)
+ st.session_state.chat_messages.pop(index - 1)
+ st.rerun()
+
+
+st.sidebar.title("模型设定调整")
+
+# st.sidebar.text("训练数据偏差,增加上下文记忆时\n多轮对话(较单轮)容易出现能力衰减")
+st.session_state.history_chat_num = st.sidebar.slider("Number of Historical Dialogues", 0, 6, 0, step=2)
+# st.session_state.history_chat_num = 0
+st.session_state.max_new_tokens = st.sidebar.slider("Max Sequence Length", 256, 8192, 8192, step=1)
+st.session_state.temperature = st.sidebar.slider("Temperature", 0.6, 1.2, 0.85, step=0.01)
+
+model_source = st.sidebar.radio("选择模型来源", ["本地模型", "API"], index=0)
+
+if model_source == "API":
+ api_url = st.sidebar.text_input("API URL", value="http://127.0.0.1:8000/v1")
+ api_model_id = st.sidebar.text_input("Model ID", value="minimind")
+ api_model_name = st.sidebar.text_input("Model Name", value="MiniMind2")
+ api_key = st.sidebar.text_input("API Key", value="none", type="password")
+ slogan = f"Hi, I'm {api_model_name}"
+else:
+ MODEL_PATHS = {
+ "MiniMind2 (0.1B)": ["./MiniMind2", "MiniMind2"],
+ "MiniMind2-MoE (0.15B)": ["./MiniMind2-MoE", "MiniMind2-MoE"],
+ "MiniMind2-Small (0.02B)": ["./MiniMind2-Small", "MiniMind2-Small"]
+ }
+
+ selected_model = st.sidebar.selectbox('Models', list(MODEL_PATHS.keys()), index=0) # 默认选择 MiniMind2
+ model_path = MODEL_PATHS[selected_model][0]
+ slogan = f"Hi, I'm {MODEL_PATHS[selected_model][1]}"
+
+image_url = "https://www.modelscope.cn/api/v1/studio/gongjy/MiniMind/repo?Revision=master&FilePath=images%2Flogo2.png&View=true"
+
+st.markdown(
+ f''
+ '
'
+ f'

'
+ f'
{slogan}'
+ '
'
+ '
内容完全由AI生成,请务必仔细甄别
Content AI-generated, please discern with care'
+ '
',
+ unsafe_allow_html=True
+)
+
+
+def setup_seed(seed):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed(seed)
+ torch.cuda.manual_seed_all(seed)
+ torch.backends.cudnn.deterministic = True
+ torch.backends.cudnn.benchmark = False
+
+
+def main():
+ if model_source == "本地模型":
+ model, tokenizer = load_model_tokenizer(model_path)
+ else:
+ model, tokenizer = None, None
+
+ if "messages" not in st.session_state:
+ st.session_state.messages = []
+ st.session_state.chat_messages = []
+
+ messages = st.session_state.messages
+
+ for i, message in enumerate(messages):
+ if message["role"] == "assistant":
+ with st.chat_message("assistant", avatar=image_url):
+ st.markdown(process_assistant_content(message["content"]), unsafe_allow_html=True)
+ if st.button("×", key=f"delete_{i}"):
+ st.session_state.messages = st.session_state.messages[:i - 1]
+ st.session_state.chat_messages = st.session_state.chat_messages[:i - 1]
+ st.rerun()
+ else:
+ st.markdown(
+ f'',
+ unsafe_allow_html=True)
+
+ prompt = st.chat_input(key="input", placeholder="给 MiniMind 发送消息")
+
+ if hasattr(st.session_state, 'regenerate') and st.session_state.regenerate:
+ prompt = st.session_state.last_user_message
+ regenerate_index = st.session_state.regenerate_index
+ delattr(st.session_state, 'regenerate')
+ delattr(st.session_state, 'last_user_message')
+ delattr(st.session_state, 'regenerate_index')
+
+ if prompt:
+ st.markdown(
+ f'',
+ unsafe_allow_html=True)
+ messages.append({"role": "user", "content": prompt[-st.session_state.max_new_tokens:]})
+ st.session_state.chat_messages.append({"role": "user", "content": prompt[-st.session_state.max_new_tokens:]})
+
+ with st.chat_message("assistant", avatar=image_url):
+ placeholder = st.empty()
+
+ if model_source == "API":
+ try:
+ from openai import OpenAI
+
+ client = OpenAI(
+ api_key=api_key,
+ base_url=api_url
+ )
+ history_num = st.session_state.history_chat_num + 1 # +1 是为了包含当前的用户消息
+ conversation_history = system_prompt + st.session_state.chat_messages[-history_num:]
+ answer = ""
+ response = client.chat.completions.create(
+ model=api_model_id,
+ messages=conversation_history,
+ stream=True,
+ temperature=st.session_state.temperature
+ )
+
+ for chunk in response:
+ content = chunk.choices[0].delta.content or ""
+ answer += content
+ placeholder.markdown(process_assistant_content(answer), unsafe_allow_html=True)
+
+ except Exception as e:
+ answer = f"API调用出错: {str(e)}"
+ placeholder.markdown(answer, unsafe_allow_html=True)
+ else:
+ random_seed = random.randint(0, 2 ** 32 - 1)
+ setup_seed(random_seed)
+
+ st.session_state.chat_messages = system_prompt + st.session_state.chat_messages[
+ -(st.session_state.history_chat_num + 1):]
+ new_prompt = tokenizer.apply_chat_template(
+ st.session_state.chat_messages,
+ tokenize=False,
+ add_generation_prompt=True
+ )
+
+ inputs = tokenizer(
+ new_prompt,
+ return_tensors="pt",
+ truncation=True
+ ).to(device)
+
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+ generation_kwargs = {
+ "input_ids": inputs.input_ids,
+ "max_length": inputs.input_ids.shape[1] + st.session_state.max_new_tokens,
+ "num_return_sequences": 1,
+ "do_sample": True,
+ "attention_mask": inputs.attention_mask,
+ "pad_token_id": tokenizer.pad_token_id,
+ "eos_token_id": tokenizer.eos_token_id,
+ "temperature": st.session_state.temperature,
+ "top_p": 0.85,
+ "streamer": streamer,
+ }
+
+ Thread(target=model.generate, kwargs=generation_kwargs).start()
+
+ answer = ""
+ for new_text in streamer:
+ answer += new_text
+ placeholder.markdown(process_assistant_content(answer), unsafe_allow_html=True)
+
+ messages.append({"role": "assistant", "content": answer})
+ st.session_state.chat_messages.append({"role": "assistant", "content": answer})
+ with st.empty():
+ if st.button("×", key=f"delete_{len(messages) - 1}"):
+ st.session_state.messages = st.session_state.messages[:-2]
+ st.session_state.chat_messages = st.session_state.chat_messages[:-2]
+ st.rerun()
+
+
+if __name__ == "__main__":
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+
+ main()
\ No newline at end of file