mirror of
https://github.com/jingyaogong/minimind.git
synced 2026-06-06 00:04:50 +00:00
[update] fp16 inference
This commit is contained in:
@@ -64,7 +64,7 @@ def init_model(args):
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(args.load_from, trust_remote_code=True)
|
||||
get_model_params(model, model.config)
|
||||
return model.eval().to(args.device), tokenizer
|
||||
return model.half().eval().to(args.device), tokenizer
|
||||
|
||||
|
||||
def parse_tool_calls(text):
|
||||
|
||||
@@ -44,7 +44,7 @@ def init_model(args):
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(args.load_from, trust_remote_code=True)
|
||||
print(f'MiniMind模型参数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f} M(illion)')
|
||||
return model.eval().to(device), tokenizer
|
||||
return model.half().eval().to(device), tokenizer
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
|
||||
+1
-1
@@ -205,7 +205,7 @@ def load_model_tokenizer(model_path):
|
||||
model_path,
|
||||
trust_remote_code=True
|
||||
)
|
||||
model = model.eval().to(device)
|
||||
model = model.half().eval().to(device)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user