diff --git a/scripts/convert_model.py b/scripts/convert_model.py index 9aa8d1e..60cb953 100644 --- a/scripts/convert_model.py +++ b/scripts/convert_model.py @@ -70,13 +70,9 @@ def convert_transformers2torch(transformers_path, torch_path): if __name__ == '__main__': - lm_config = MiniMindConfig(hidden_size=768, num_hidden_layers=16, max_seq_len=8192, use_moe=False) - + lm_config = MiniMindConfig(hidden_size=512, num_hidden_layers=8, max_seq_len=8192, use_moe=False) torch_path = f"../out/full_sft_{lm_config.hidden_size}{'_moe' if lm_config.use_moe else ''}.pth" - - transformers_path = '../MiniMind2' - + transformers_path = '../MiniMind2-Small' convert_torch2transformers_llama(torch_path, transformers_path) - - # # # convert transformers to torch model - # # convert_transformers2torch(transformers_path, torch_path) + # # convert transformers to torch model + # convert_transformers2torch(transformers_path, torch_path)