[update] fp16 inference

This commit is contained in:
jingyaogong
2026-03-27 16:29:46 +08:00
parent 88e675dc2c
commit 6b0b0c5e2f
4 changed files with 4 additions and 4 deletions
+1 -1
View File
@@ -64,7 +64,7 @@ def init_model(args):
else:
model = AutoModelForCausalLM.from_pretrained(args.load_from, trust_remote_code=True)
get_model_params(model, model.config)
return model.eval().to(args.device), tokenizer
return model.half().eval().to(args.device), tokenizer
def parse_tool_calls(text):
+1 -1
View File
@@ -44,7 +44,7 @@ def init_model(args):
else:
model = AutoModelForCausalLM.from_pretrained(args.load_from, trust_remote_code=True)
print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f} M(illion)')
return model.eval().to(device), tokenizer
return model.half().eval().to(device), tokenizer
class ChatRequest(BaseModel):
+1 -1
View File
@@ -205,7 +205,7 @@ def load_model_tokenizer(model_path):
model_path,
trust_remote_code=True
)
model = model.eval().to(device)
model = model.half().eval().to(device)
return model, tokenizer