from openai import OpenAI client = OpenAI( api_key="sk-123", base_url="http://localhost:11434/v1" ) stream = True conversation_history_origin = [] conversation_history = conversation_history_origin.copy() history_messages_num = 0 # 必须设置为偶数(Q+A),为0则不携带历史对话 while True: query = input('[Q]: ') conversation_history.append({"role": "user", "content": query}) response = client.chat.completions.create( model="minimind-local:latest", messages=conversation_history[-(history_messages_num or 1):], stream=stream, temperature=0.8, max_tokens=2048, top_p=0.8, extra_body={"chat_template_kwargs": {"open_thinking": True}, "reasoning_effort": "medium"} # 思考开关 ) if not stream: assistant_res = response.choices[0].message.content print('[A]: ', assistant_res) else: print('[A]: ', end='', flush=True) assistant_res = '' for chunk in response: delta = chunk.choices[0].delta r = getattr(delta, 'reasoning_content', None) or "" c = delta.content or "" if r: print(f'\033[90m{r}\033[0m', end="", flush=True) if c: print(c, end="", flush=True) assistant_res += c conversation_history.append({"role": "assistant", "content": assistant_res}) print('\n\n')