commit 3639ad0df1780774cf8b2c85d88c1e0d1f1d46fa Author: jingyaogong Date: Sun Oct 12 22:14:17 2025 +0800 init minimind-docs diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..d799408 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,18 @@ +# Read the Docs 配置文件 +version: 2 + +# 构建配置 +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +# MkDocs 配置 +mkdocs: + configuration: mkdocs.yml + +# Python 依赖 +python: + install: + - requirements: requirements.txt + diff --git a/docs/images/1-wiki.png b/docs/images/1-wiki.png new file mode 100644 index 0000000..5eba0fc Binary files /dev/null and b/docs/images/1-wiki.png differ diff --git a/docs/images/2-wiki.png b/docs/images/2-wiki.png new file mode 100644 index 0000000..35a1d53 Binary files /dev/null and b/docs/images/2-wiki.png differ diff --git a/docs/images/3-wiki.png b/docs/images/3-wiki.png new file mode 100644 index 0000000..9570bbf Binary files /dev/null and b/docs/images/3-wiki.png differ diff --git a/docs/images/4-wiki.png b/docs/images/4-wiki.png new file mode 100644 index 0000000..931e240 Binary files /dev/null and b/docs/images/4-wiki.png differ diff --git a/docs/images/5-wiki.png b/docs/images/5-wiki.png new file mode 100644 index 0000000..78ca9af Binary files /dev/null and b/docs/images/5-wiki.png differ diff --git a/docs/images/LLM-structure-moe.png b/docs/images/LLM-structure-moe.png new file mode 100644 index 0000000..4588477 Binary files /dev/null and b/docs/images/LLM-structure-moe.png differ diff --git a/docs/images/LLM-structure.png b/docs/images/LLM-structure.png new file mode 100755 index 0000000..bbd93dd Binary files /dev/null and b/docs/images/LLM-structure.png differ diff --git a/docs/images/and_huggingface.png b/docs/images/and_huggingface.png new file mode 100644 index 0000000..c234f8a Binary files /dev/null and b/docs/images/and_huggingface.png differ diff --git a/docs/images/and_modelscope.png b/docs/images/and_modelscope.png new file mode 100644 index 0000000..1e46da4 Binary files /dev/null and b/docs/images/and_modelscope.png differ diff --git a/docs/images/compare_radar.png b/docs/images/compare_radar.png new file mode 100644 index 0000000..345d9f6 Binary files /dev/null and b/docs/images/compare_radar.png differ diff --git a/docs/images/dataset.jpg b/docs/images/dataset.jpg new file mode 100644 index 0000000..7dfc366 Binary files /dev/null and b/docs/images/dataset.jpg differ diff --git a/docs/images/gpt3_config.png b/docs/images/gpt3_config.png new file mode 100644 index 0000000..121bc29 Binary files /dev/null and b/docs/images/gpt3_config.png differ diff --git a/docs/images/logo.png b/docs/images/logo.png new file mode 100644 index 0000000..14d8301 Binary files /dev/null and b/docs/images/logo.png differ diff --git a/docs/images/logo2.png b/docs/images/logo2.png new file mode 100644 index 0000000..9a0b3e2 Binary files /dev/null and b/docs/images/logo2.png differ diff --git a/docs/images/minimind2.gif b/docs/images/minimind2.gif new file mode 100644 index 0000000..43c9cd1 Binary files /dev/null and b/docs/images/minimind2.gif differ diff --git a/docs/images/pre_512_loss.png b/docs/images/pre_512_loss.png new file mode 100644 index 0000000..3da0be5 Binary files /dev/null and b/docs/images/pre_512_loss.png differ diff --git a/docs/images/pre_768_loss.png b/docs/images/pre_768_loss.png new file mode 100644 index 0000000..e00b23c Binary files /dev/null and b/docs/images/pre_768_loss.png differ diff --git a/docs/images/rope_ppl.png b/docs/images/rope_ppl.png new file mode 100644 index 0000000..223292e Binary files /dev/null and b/docs/images/rope_ppl.png differ diff --git a/docs/images/sft_512_loss.png b/docs/images/sft_512_loss.png new file mode 100644 index 0000000..40b86bc Binary files /dev/null and b/docs/images/sft_512_loss.png differ diff --git a/docs/images/sft_768_loss.png b/docs/images/sft_768_loss.png new file mode 100644 index 0000000..5ea6c97 Binary files /dev/null and b/docs/images/sft_768_loss.png differ diff --git a/docs/images/training_grpo.png b/docs/images/training_grpo.png new file mode 100644 index 0000000..54e925e Binary files /dev/null and b/docs/images/training_grpo.png differ diff --git a/docs/images/training_ppo.png b/docs/images/training_ppo.png new file mode 100644 index 0000000..dd0f275 Binary files /dev/null and b/docs/images/training_ppo.png differ diff --git a/docs/index.en.md b/docs/index.en.md new file mode 100644 index 0000000..f4a60b3 --- /dev/null +++ b/docs/index.en.md @@ -0,0 +1,53 @@ +# Welcome to MiniMind! + +
+ ![logo](images/logo.png) +
"Simplicity is the ultimate sophistication"
+
+ +## 📌 Introduction + +MiniMind is a super-small language model project trained completely from scratch, requiring **only $0.5 + 2 hours** to train a **26M** language model! + +- **MiniMind** series is extremely lightweight, the smallest version is **1/7000** the size of GPT-3 +- The project open-sources the minimalist structure of large models, including: + - Mixture of Experts (MoE) + - Dataset cleaning + - Pretraining + - Supervised Fine-Tuning (SFT) + - LoRA fine-tuning + - Direct Preference Optimization (DPO) + - Model distillation +- All core algorithm code is reconstructed from scratch using native PyTorch, without relying on third-party abstract interfaces +- This is not only a full-stage open-source reproduction of large language models, but also a tutorial for getting started with LLMs + +!!! note "Training Cost" + "2 hours" is based on NVIDIA 3090 hardware (single card) testing, "$0.5" refers to GPU server rental cost + +## ✨ Key Features + +- **Ultra-low cost**: Single 3090, 2 hours, $0.5 to train a ChatBot from scratch +- **Complete pipeline**: Covers Tokenizer, pretraining, SFT, LoRA, DPO, distillation full process +- **Education-friendly**: Clean code, suitable for learning LLM principles +- **Ecosystem compatible**: Supports `transformers`, `llama.cpp`, `vllm`, `ollama` and other mainstream frameworks + +## 📊 Model List + +| Model (Size) | Inference Memory (Approx.) | Release | +|------------|----------|---------| +| MiniMind2-small (26M) | 0.5 GB | 2025.04.26 | +| MiniMind2-MoE (145M) | 1.0 GB | 2025.04.26 | +| MiniMind2 (104M) | 1.0 GB | 2025.04.26 | + +## 🚀 Quick Navigation + +- [Quick Start](quickstart.en.md) - Environment setup, model download, quick testing +- [Model Training](training.en.md) - Pretraining, SFT, LoRA, DPO training process + +## 🔗 Related Links + +- **GitHub**: [https://github.com/jingyaogong/minimind](https://github.com/jingyaogong/minimind) +- **HuggingFace**: [MiniMind Collection](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5) +- **ModelScope**: [MiniMind Models](https://www.modelscope.cn/profile/gongjy) +- **Online Demo**: [ModelScope Studio](https://www.modelscope.cn/studios/gongjy/MiniMind) + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..3fbeef1 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,53 @@ +# Welcome to MiniMind! + + +
+ ![logo](images/logo.png) +
+ +## 📌 项目简介 + +MiniMind 是一个完全从 0 开始训练的超小语言模型项目,**仅需 3 块钱成本 + 2 小时**即可训练出仅为 **26M** 的语言模型! + +- **MiniMind** 系列极其轻量,最小版本体积是 GPT-3 的 **1/7000** +- 项目开源了大模型的极简结构,包含: + - 混合专家模型(MoE) + - 数据集清洗 + - 预训练(Pretrain) + - 监督微调(SFT) + - LoRA 微调 + - 直接偏好优化(DPO) + - 模型蒸馏 +- 所有核心算法代码均从 0 使用 PyTorch 原生重构,不依赖第三方抽象接口 +- 这不仅是大语言模型的全阶段开源复现,也是一个入门 LLM 的教程 + +!!! note "训练成本" + "2小时" 基于 NVIDIA 3090 硬件设备(单卡)测试,"3块钱" 指 GPU 服务器租用成本 + +## ✨ 主要特点 + +- **超低成本**:单卡 3090,2 小时,3 块钱即可从 0 训练 ChatBot +- **完整流程**:涵盖 Tokenizer、预训练、SFT、LoRA、DPO、蒸馏全流程 +- **教育友好**:代码简洁,适合学习 LLM 原理 +- **生态兼容**:支持 `transformers`、`llama.cpp`、`vllm`、`ollama` 等主流框架 + +## 📊 模型列表 + +| 模型 (大小) | 推理占用 (约) | Release | +|------------|----------|---------| +| MiniMind2-small (26M) | 0.5 GB | 2025.04.26 | +| MiniMind2-MoE (145M) | 1.0 GB | 2025.04.26 | +| MiniMind2 (104M) | 1.0 GB | 2025.04.26 | + +## 🚀 快速导航 + +- [快速开始](quickstart.md) - 环境安装、模型下载、快速测试 +- [模型训练](training.md) - 预训练、SFT、LoRA、DPO 等训练流程 + +## 🔗 相关链接 + +- **GitHub**: [https://github.com/jingyaogong/minimind](https://github.com/jingyaogong/minimind) +- **HuggingFace**: [MiniMind Collection](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5) +- **ModelScope**: [MiniMind 模型](https://www.modelscope.cn/profile/gongjy) +- **在线体验**: [ModelScope 创空间](https://www.modelscope.cn/studios/gongjy/MiniMind) + diff --git a/docs/quickstart.en.md b/docs/quickstart.en.md new file mode 100644 index 0000000..c3e0ade --- /dev/null +++ b/docs/quickstart.en.md @@ -0,0 +1,114 @@ +# Quick Start + +This page will help you quickly get started with the MiniMind project. + +## 📋 Requirements + +- **Python**: 3.10+ +- **PyTorch**: 1.12+ +- **CUDA**: 12.2+ (optional, for GPU acceleration) +- **VRAM**: At least 8GB (24GB recommended) + +!!! tip "Hardware Configuration Reference" + - CPU: Intel i9-10980XE @ 3.00GHz + - RAM: 128 GB + - GPU: NVIDIA GeForce RTX 3090 (24GB) + +## 🚀 Testing Existing Models + +### 1. Clone the Project + +```bash +git clone https://github.com/jingyaogong/minimind.git +cd minimind +``` + +### 2. Install Dependencies + +```bash +pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +!!! warning "Torch CUDA Check" + After installation, test if Torch can use CUDA: + ```python + import torch + print(torch.cuda.is_available()) + ``` + +### 3. Download Model + +Download pretrained models from HuggingFace or ModelScope: + +```bash +# From HuggingFace +git clone https://huggingface.co/jingyaogong/MiniMind2 + +# Or from ModelScope +git clone https://www.modelscope.cn/models/gongjy/MiniMind2.git +``` + +### 4. Command Line Q&A + +```bash +# load=0: load PyTorch model, load=1: load Transformers model +python eval_model.py --load 1 --model_mode 2 +``` + +### 5. Start WebUI (Optional) + +```bash +# Requires Python >= 3.10 +pip install streamlit +cd scripts +streamlit run web_demo.py +``` + +Visit `http://localhost:8501` to use the web interface. + +## 🔧 Third-party Inference Frameworks + +MiniMind supports multiple mainstream inference frameworks: + +### Ollama + +```bash +ollama run jingyaogong/minimind2 +``` + +### vLLM + +```bash +vllm serve ./MiniMind2/ --served-model-name "minimind" +``` + +### llama.cpp + +```bash +# Convert model +python convert_hf_to_gguf.py ./MiniMind2/ + +# Quantize model +./build/bin/llama-quantize ./MiniMind2/MiniMind2-109M-F16.gguf ./Q4-MiniMind2.gguf Q4_K_M + +# Inference +./build/bin/llama-cli -m ./Q4-MiniMind2.gguf --chat-template chatml +``` + +## 📝 Effect Testing + +```text +👶: Hello, please introduce yourself. +🤖️: Hello! I'm MiniMind, an AI assistant developed by Jingyao Gong. + I interact with users through natural language processing and algorithm training. + +👶: What is the highest mountain in the world? +🤖️: Mount Everest is the highest mountain in the world, located in the Himalayas, + with an elevation of 8,848.86 meters (29,031.7 feet). +``` + +## 🎯 Next Steps + +- Check [Model Training](training.en.md) to learn how to train your own model from scratch +- Read the source code to understand LLM implementation principles + diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 0000000..a8bbf94 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,114 @@ +# 快速开始 + +本页面将帮助你快速上手 MiniMind 项目。 + +## 📋 环境要求 + +- **Python**: 3.10+ +- **PyTorch**: 1.12+ +- **CUDA**: 12.2+(可选,用于 GPU 加速) +- **显存**: 至少 8GB(推荐 24GB) + +!!! tip "硬件配置参考" + - CPU: Intel i9-10980XE @ 3.00GHz + - RAM: 128 GB + - GPU: NVIDIA GeForce RTX 3090 (24GB) + +## 🚀 测试已有模型 + +### 1. 克隆项目 + +```bash +git clone https://github.com/jingyaogong/minimind.git +cd minimind +``` + +### 2. 安装依赖 + +```bash +pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +!!! warning "Torch CUDA 检查" + 安装后请测试 Torch 是否可用 CUDA: + ```python + import torch + print(torch.cuda.is_available()) + ``` + +### 3. 下载模型 + +从 HuggingFace 或 ModelScope 下载预训练模型: + +```bash +# 从 HuggingFace 下载 +git clone https://huggingface.co/jingyaogong/MiniMind2 + +# 或从 ModelScope 下载 +git clone https://www.modelscope.cn/models/gongjy/MiniMind2.git +``` + +### 4. 命令行问答 + +```bash +# load=0: 加载 PyTorch 模型, load=1: 加载 Transformers 模型 +python eval_model.py --load 1 --model_mode 2 +``` + +### 5. 启动 WebUI(可选) + +```bash +# 需要 Python >= 3.10 +pip install streamlit +cd scripts +streamlit run web_demo.py +``` + +访问 `http://localhost:8501` 即可使用 Web 界面。 + +## 🔧 第三方推理框架 + +MiniMind 支持多种主流推理框架: + +### Ollama + +```bash +ollama run jingyaogong/minimind2 +``` + +### vLLM + +```bash +vllm serve ./MiniMind2/ --served-model-name "minimind" +``` + +### llama.cpp + +```bash +# 转换模型 +python convert_hf_to_gguf.py ./MiniMind2/ + +# 量化模型 +./build/bin/llama-quantize ./MiniMind2/MiniMind2-109M-F16.gguf ./Q4-MiniMind2.gguf Q4_K_M + +# 推理 +./build/bin/llama-cli -m ./Q4-MiniMind2.gguf --chat-template chatml +``` + +## 📝 效果测试 + +```text +👶: 你好,请介绍一下自己。 +🤖️: 你好!我是 MiniMind,一个由 Jingyao Gong 开发的人工智能助手。 + 我通过自然语言处理和算法训练来与用户进行交互。 + +👶: 世界上最高的山峰是什么? +🤖️: 珠穆朗玛峰是世界上最高的山峰,位于喜马拉雅山脉, + 海拔 8,848.86 米(29,031.7 英尺)。 +``` + +## 🎯 下一步 + +- 查看 [模型训练](training.md) 了解如何从 0 开始训练自己的模型 +- 阅读源码了解 LLM 的实现原理 + diff --git a/docs/training.en.md b/docs/training.en.md new file mode 100644 index 0000000..dd6624d --- /dev/null +++ b/docs/training.en.md @@ -0,0 +1,186 @@ +# Model Training + +This page introduces how to train MiniMind language models from scratch. + +## 📊 Data Preparation + +### 1. Download Dataset + +Download datasets from [ModelScope](https://www.modelscope.cn/datasets/gongjy/minimind_dataset/files) or [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset). + +Create `./dataset` directory and place data files: + +```bash +./dataset/ +├── pretrain_hq.jsonl (1.6GB, ✨Recommended) +├── sft_mini_512.jsonl (1.2GB, ✨Recommended) +├── sft_512.jsonl (7.5GB) +├── sft_1024.jsonl (5.6GB) +├── sft_2048.jsonl (9GB) +├── dpo.jsonl (909MB) +├── r1_mix_1024.jsonl (340MB) +└── lora_*.jsonl +``` + +!!! tip "Recommended Combination" + Fastest reproduction: `pretrain_hq.jsonl` + `sft_mini_512.jsonl` + + **Single 3090 only needs 2 hours + $0.5!** + +### 2. Data Format + +**Pretrain Data** (`pretrain_hq.jsonl`): +```json +{"text": "How to overcome procrastination? Overcoming procrastination is not easy..."} +``` + +**SFT Data** (`sft_*.jsonl`): +```json +{ + "conversations": [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hello!"} + ] +} +``` + +## 🎯 Training Pipeline + +All training scripts are located in the `./trainer` directory. + +### 1. Pretraining + +The pretraining stage lets the model learn basic knowledge, the goal is to **learn word continuation**. + +```bash +cd trainer +python train_pretrain.py + +# Multi-GPU training +torchrun --nproc_per_node 2 train_pretrain.py +``` + +Output weights: `./out/pretrain_*.pth` + +!!! info "Training Duration" + - MiniMind2-Small (26M): ~1.1h (single 3090) + - MiniMind2 (104M): ~3.9h (single 3090) + +### 2. Supervised Fine-Tuning (SFT) + +The SFT stage teaches the model conversation patterns and adapts to chat templates. + +```bash +python train_full_sft.py + +# Multi-GPU training +torchrun --nproc_per_node 2 train_full_sft.py +``` + +Output weights: `./out/full_sft_*.pth` + +!!! info "Training Duration" + - MiniMind2-Small: ~1h (using sft_mini_512) + - MiniMind2: ~3.3h (using sft_mini_512) + +### 3. LoRA Fine-tuning (Optional) + +LoRA is a parameter-efficient fine-tuning method, suitable for domain adaptation. + +```bash +python train_lora.py +``` + +**Use Cases**: +- Medical Q&A: use `lora_medical.jsonl` +- Self-awareness: use `lora_identity.jsonl` + +Output weights: `./out/lora/lora_*.pth` + +### 4. DPO Reinforcement Learning (Optional) + +DPO is used to optimize model response quality to better align with human preferences. + +```bash +python train_dpo.py +``` + +Output weights: `./out/rlhf_*.pth` + +### 5. Reasoning Model Distillation (Optional) + +Distill reasoning capabilities from DeepSeek-R1. + +```bash +python train_distill_reason.py +``` + +Output weights: `./out/reason_*.pth` + +## 📈 Model Architecture + +MiniMind uses Transformer Decoder-Only architecture (similar to Llama3): + +![structure](images/LLM-structure.png) + +### Model Parameter Configuration + +| Model Name | params | d_model | n_layers | kv_heads | q_heads | +|------------|--------|---------|----------|----------|---------| +| MiniMind2-Small | 26M | 512 | 8 | 2 | 8 | +| MiniMind2-MoE | 145M | 640 | 8 | 2 | 8 | +| MiniMind2 | 104M | 768 | 16 | 2 | 8 | + +## 🧪 Test Model + +```bash +# model_mode: 0=pretrain, 1=sft, 2=rlhf, 3=reason +python eval_model.py --model_mode 1 + +# Test LoRA model +python eval_model.py --lora_name 'lora_medical' --model_mode 2 +``` + +## 🔧 Multi-GPU Training + +### DDP Method + +```bash +torchrun --nproc_per_node N train_xxx.py +``` + +### DeepSpeed Method + +```bash +deepspeed --master_port 29500 --num_gpus=N train_xxx.py +``` + +### Wandb Monitoring + +```bash +# Login first +wandb login + +# Enable wandb +torchrun --nproc_per_node N train_xxx.py --use_wandb +``` + +## 💰 Training Cost + +Based on single NVIDIA 3090: + +| Dataset Combination | Duration | Cost | Effect | +|-----------|------|------|------| +| pretrain_hq + sft_mini_512 | 2.1h | ≈$0.35 | 😊😊 Basic chat | +| Full dataset (MiniMind2-Small) | 38h | ≈$6.50 | 😊😊😊😊😊😊 Complete capabilities | +| Full dataset (MiniMind2) | 122h | ≈$20.80 | 😊😊😊😊😊😊😊😊 Best performance | + +!!! success "Quick Reproduction" + Using `pretrain_hq` + `sft_mini_512`, single 3090 only needs **2 hours + $0.5** to train a ChatBot! + +## 📝 Common Issues + +- **Out of memory**: Reduce `batch_size` or use DeepSpeed +- **Training not converging**: Adjust learning rate or check data quality +- **Multi-GPU training error**: Ensure all GPUs are visible and CUDA versions are consistent + diff --git a/docs/training.md b/docs/training.md new file mode 100644 index 0000000..c1c80d8 --- /dev/null +++ b/docs/training.md @@ -0,0 +1,186 @@ +# 模型训练 + +本页面介绍如何从 0 开始训练 MiniMind 语言模型。 + +## 📊 数据准备 + +### 1. 下载数据集 + +从 [ModelScope](https://www.modelscope.cn/datasets/gongjy/minimind_dataset/files) 或 [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset) 下载数据集。 + +创建 `./dataset` 目录并放入数据文件: + +```bash +./dataset/ +├── pretrain_hq.jsonl (1.6GB, ✨推荐) +├── sft_mini_512.jsonl (1.2GB, ✨推荐) +├── sft_512.jsonl (7.5GB) +├── sft_1024.jsonl (5.6GB) +├── sft_2048.jsonl (9GB) +├── dpo.jsonl (909MB) +├── r1_mix_1024.jsonl (340MB) +└── lora_*.jsonl +``` + +!!! tip "推荐组合" + 最快速度复现:`pretrain_hq.jsonl` + `sft_mini_512.jsonl` + + **单卡 3090 仅需 2 小时 + 3 块钱!** + +### 2. 数据格式 + +**预训练数据** (`pretrain_hq.jsonl`): +```json +{"text": "如何才能摆脱拖延症?治愈拖延症并不容易..."} +``` + +**SFT 数据** (`sft_*.jsonl`): +```json +{ + "conversations": [ + {"role": "user", "content": "你好"}, + {"role": "assistant", "content": "你好!"} + ] +} +``` + +## 🎯 训练流程 + +所有训练脚本位于 `./trainer` 目录。 + +### 1. 预训练(Pretrain) + +预训练阶段让模型学习基础知识,目标是**学会词语接龙**。 + +```bash +cd trainer +python train_pretrain.py + +# 多卡训练 +torchrun --nproc_per_node 2 train_pretrain.py +``` + +输出权重:`./out/pretrain_*.pth` + +!!! info "训练时长" + - MiniMind2-Small (26M): ~1.1h (单卡 3090) + - MiniMind2 (104M): ~3.9h (单卡 3090) + +### 2. 监督微调(SFT) + +SFT 阶段让模型学习对话方式,适应聊天模板。 + +```bash +python train_full_sft.py + +# 多卡训练 +torchrun --nproc_per_node 2 train_full_sft.py +``` + +输出权重:`./out/full_sft_*.pth` + +!!! info "训练时长" + - MiniMind2-Small: ~1h (使用 sft_mini_512) + - MiniMind2: ~3.3h (使用 sft_mini_512) + +### 3. LoRA 微调(可选) + +LoRA 是一种参数高效的微调方法,适合领域适配。 + +```bash +python train_lora.py +``` + +**应用场景**: +- 医疗问答:使用 `lora_medical.jsonl` +- 自我认知:使用 `lora_identity.jsonl` + +输出权重:`./out/lora/lora_*.pth` + +### 4. DPO 强化学习(可选) + +DPO 用于优化模型回复质量,使其更符合人类偏好。 + +```bash +python train_dpo.py +``` + +输出权重:`./out/rlhf_*.pth` + +### 5. 推理模型蒸馏(可选) + +蒸馏 DeepSeek-R1 的推理能力。 + +```bash +python train_distill_reason.py +``` + +输出权重:`./out/reason_*.pth` + +## 📈 模型结构 + +MiniMind 使用 Transformer Decoder-Only 结构(类似 Llama3): + +![structure](images/LLM-structure.png) + +### 模型参数配置 + +| Model Name | params | d_model | n_layers | kv_heads | q_heads | +|------------|--------|---------|----------|----------|---------| +| MiniMind2-Small | 26M | 512 | 8 | 2 | 8 | +| MiniMind2-MoE | 145M | 640 | 8 | 2 | 8 | +| MiniMind2 | 104M | 768 | 16 | 2 | 8 | + +## 🧪 测试模型 + +```bash +# model_mode: 0=pretrain, 1=sft, 2=rlhf, 3=reason +python eval_model.py --model_mode 1 + +# 测试 LoRA 模型 +python eval_model.py --lora_name 'lora_medical' --model_mode 2 +``` + +## 🔧 多卡训练 + +### DDP 方式 + +```bash +torchrun --nproc_per_node N train_xxx.py +``` + +### DeepSpeed 方式 + +```bash +deepspeed --master_port 29500 --num_gpus=N train_xxx.py +``` + +### Wandb 监控 + +```bash +# 需要先登录 +wandb login + +# 启用 wandb +torchrun --nproc_per_node N train_xxx.py --use_wandb +``` + +## 💰 训练成本 + +基于单卡 NVIDIA 3090: + +| 数据集组合 | 时长 | 成本 | 效果 | +|-----------|------|------|------| +| pretrain_hq + sft_mini_512 | 2.1h | ≈2.73¥ | 😊😊 基础对话 | +| 完整数据集 (MiniMind2-Small) | 38h | ≈49.61¥ | 😊😊😊😊😊😊 完整能力 | +| 完整数据集 (MiniMind2) | 122h | ≈158.6¥ | 😊😊😊😊😊😊😊😊 最强性能 | + +!!! success "极速复现" + 使用 `pretrain_hq` + `sft_mini_512`,单卡 3090 仅需 **2 小时 + 3 块钱**即可训练出能对话的 ChatBot! + +## 📝 常见问题 + +- **显存不足**:减小 `batch_size` 或使用 DeepSpeed +- **训练不收敛**:调整学习率或检查数据质量 +- **多卡训练报错**:确保所有卡都可见且 CUDA 版本一致 + diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..94b2d03 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,89 @@ +site_name: MiniMind +site_description: MiniMind - 轻量级语言模型训练框架 / Lightweight Language Model Training Framework +site_author: jingyaogong +site_url: https://minimind.readthedocs.io/ + +# 多语言配置 +plugins: + - search: + lang: + - zh + - en + - i18n: + docs_structure: suffix + fallback_to_default: true + reconfigure_material: true + reconfigure_search: true + languages: + - locale: zh + default: true + name: 简体中文 + build: true + nav_translations: + 首页: 首页 + 快速开始: 快速开始 + 模型训练: 模型训练 + - locale: en + name: English + build: true + nav_translations: + 首页: Home + 快速开始: Quick Start + 模型训练: Model Training + +# 主题配置 +theme: + name: material + favicon: images/logo.png + icon: + logo: material/book-open-page-variant + palette: + # 浅色模式 + - scheme: default + primary: white + accent: blue + toggle: + icon: material/brightness-7 + name: 切换至深色模式 + # 深色模式 + - scheme: slate + primary: black + accent: blue + toggle: + icon: material/brightness-4 + name: 切换至浅色模式 + features: + - navigation.instant # 即时加载 + - navigation.tracking # 锚点跟踪 + - navigation.sections # 导航分组 + - navigation.expand # 默认展开导航 + - navigation.top # 返回顶部按钮 + - search.suggest # 搜索建议 + - search.highlight # 搜索高亮 + - content.code.copy # 代码复制按钮 + - toc.follow # 目录跟随 + - toc.integrate # 目录集成到左侧边栏 + language: zh + +# 导航结构 +nav: + - 首页: index.md + - 快速开始: quickstart.md + - 模型训练: training.md + +# Markdown 扩展 +markdown_extensions: + - toc: + permalink: true + - admonition + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - pymdownx.details + - pymdownx.tabbed: + alternate_style: true + - attr_list + - md_in_html +