commit 3639ad0df1780774cf8b2c85d88c1e0d1f1d46fa
Author: jingyaogong <gongjy.cs@qq.com>
Date:   Sun Oct 12 22:14:17 2025 +0800

    init minimind-docs

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..d799408
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,18 @@
+# Read the Docs 配置文件
+version: 2
+
+# 构建配置
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
+# MkDocs 配置
+mkdocs:
+  configuration: mkdocs.yml
+
+# Python 依赖
+python:
+  install:
+    - requirements: requirements.txt
+
diff --git a/docs/images/1-wiki.png b/docs/images/1-wiki.png
new file mode 100644
index 0000000..5eba0fc
Binary files /dev/null and b/docs/images/1-wiki.png differ
diff --git a/docs/images/2-wiki.png b/docs/images/2-wiki.png
new file mode 100644
index 0000000..35a1d53
Binary files /dev/null and b/docs/images/2-wiki.png differ
diff --git a/docs/images/3-wiki.png b/docs/images/3-wiki.png
new file mode 100644
index 0000000..9570bbf
Binary files /dev/null and b/docs/images/3-wiki.png differ
diff --git a/docs/images/4-wiki.png b/docs/images/4-wiki.png
new file mode 100644
index 0000000..931e240
Binary files /dev/null and b/docs/images/4-wiki.png differ
diff --git a/docs/images/5-wiki.png b/docs/images/5-wiki.png
new file mode 100644
index 0000000..78ca9af
Binary files /dev/null and b/docs/images/5-wiki.png differ
diff --git a/docs/images/LLM-structure-moe.png b/docs/images/LLM-structure-moe.png
new file mode 100644
index 0000000..4588477
Binary files /dev/null and b/docs/images/LLM-structure-moe.png differ
diff --git a/docs/images/LLM-structure.png b/docs/images/LLM-structure.png
new file mode 100755
index 0000000..bbd93dd
Binary files /dev/null and b/docs/images/LLM-structure.png differ
diff --git a/docs/images/and_huggingface.png b/docs/images/and_huggingface.png
new file mode 100644
index 0000000..c234f8a
Binary files /dev/null and b/docs/images/and_huggingface.png differ
diff --git a/docs/images/and_modelscope.png b/docs/images/and_modelscope.png
new file mode 100644
index 0000000..1e46da4
Binary files /dev/null and b/docs/images/and_modelscope.png differ
diff --git a/docs/images/compare_radar.png b/docs/images/compare_radar.png
new file mode 100644
index 0000000..345d9f6
Binary files /dev/null and b/docs/images/compare_radar.png differ
diff --git a/docs/images/dataset.jpg b/docs/images/dataset.jpg
new file mode 100644
index 0000000..7dfc366
Binary files /dev/null and b/docs/images/dataset.jpg differ
diff --git a/docs/images/gpt3_config.png b/docs/images/gpt3_config.png
new file mode 100644
index 0000000..121bc29
Binary files /dev/null and b/docs/images/gpt3_config.png differ
diff --git a/docs/images/logo.png b/docs/images/logo.png
new file mode 100644
index 0000000..14d8301
Binary files /dev/null and b/docs/images/logo.png differ
diff --git a/docs/images/logo2.png b/docs/images/logo2.png
new file mode 100644
index 0000000..9a0b3e2
Binary files /dev/null and b/docs/images/logo2.png differ
diff --git a/docs/images/minimind2.gif b/docs/images/minimind2.gif
new file mode 100644
index 0000000..43c9cd1
Binary files /dev/null and b/docs/images/minimind2.gif differ
diff --git a/docs/images/pre_512_loss.png b/docs/images/pre_512_loss.png
new file mode 100644
index 0000000..3da0be5
Binary files /dev/null and b/docs/images/pre_512_loss.png differ
diff --git a/docs/images/pre_768_loss.png b/docs/images/pre_768_loss.png
new file mode 100644
index 0000000..e00b23c
Binary files /dev/null and b/docs/images/pre_768_loss.png differ
diff --git a/docs/images/rope_ppl.png b/docs/images/rope_ppl.png
new file mode 100644
index 0000000..223292e
Binary files /dev/null and b/docs/images/rope_ppl.png differ
diff --git a/docs/images/sft_512_loss.png b/docs/images/sft_512_loss.png
new file mode 100644
index 0000000..40b86bc
Binary files /dev/null and b/docs/images/sft_512_loss.png differ
diff --git a/docs/images/sft_768_loss.png b/docs/images/sft_768_loss.png
new file mode 100644
index 0000000..5ea6c97
Binary files /dev/null and b/docs/images/sft_768_loss.png differ
diff --git a/docs/images/training_grpo.png b/docs/images/training_grpo.png
new file mode 100644
index 0000000..54e925e
Binary files /dev/null and b/docs/images/training_grpo.png differ
diff --git a/docs/images/training_ppo.png b/docs/images/training_ppo.png
new file mode 100644
index 0000000..dd0f275
Binary files /dev/null and b/docs/images/training_ppo.png differ
diff --git a/docs/index.en.md b/docs/index.en.md
new file mode 100644
index 0000000..f4a60b3
--- /dev/null
+++ b/docs/index.en.md
@@ -0,0 +1,53 @@
+# <strong>Welcome to MiniMind!</strong>
+
+<figure markdown>
+  ![logo](images/logo.png)
+  <figcaption><strong>"Simplicity is the ultimate sophistication"</strong></figcaption>
+</figure>
+
+## 📌 Introduction
+
+MiniMind is a super-small language model project trained completely from scratch, requiring **only $0.5 + 2 hours** to train a **26M** language model!
+
+- **MiniMind** series is extremely lightweight, the smallest version is **1/7000** the size of GPT-3
+- The project open-sources the minimalist structure of large models, including:
+  - Mixture of Experts (MoE)
+  - Dataset cleaning
+  - Pretraining
+  - Supervised Fine-Tuning (SFT)
+  - LoRA fine-tuning
+  - Direct Preference Optimization (DPO)
+  - Model distillation
+- All core algorithm code is reconstructed from scratch using native PyTorch, without relying on third-party abstract interfaces
+- This is not only a full-stage open-source reproduction of large language models, but also a tutorial for getting started with LLMs
+
+!!! note "Training Cost"
+    "2 hours" is based on NVIDIA 3090 hardware (single card) testing, "$0.5" refers to GPU server rental cost
+
+## ✨ Key Features
+
+- **Ultra-low cost**: Single 3090, 2 hours, $0.5 to train a ChatBot from scratch
+- **Complete pipeline**: Covers Tokenizer, pretraining, SFT, LoRA, DPO, distillation full process
+- **Education-friendly**: Clean code, suitable for learning LLM principles
+- **Ecosystem compatible**: Supports `transformers`, `llama.cpp`, `vllm`, `ollama` and other mainstream frameworks
+
+## 📊 Model List
+
+| Model (Size) | Inference Memory (Approx.) | Release |
+|------------|----------|---------|
+| MiniMind2-small (26M) | 0.5 GB | 2025.04.26 |
+| MiniMind2-MoE (145M) | 1.0 GB | 2025.04.26 |
+| MiniMind2 (104M) | 1.0 GB | 2025.04.26 |
+
+## 🚀 Quick Navigation
+
+- [Quick Start](quickstart.en.md) - Environment setup, model download, quick testing
+- [Model Training](training.en.md) - Pretraining, SFT, LoRA, DPO training process
+
+## 🔗 Related Links
+
+- **GitHub**: [https://github.com/jingyaogong/minimind](https://github.com/jingyaogong/minimind)
+- **HuggingFace**: [MiniMind Collection](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
+- **ModelScope**: [MiniMind Models](https://www.modelscope.cn/profile/gongjy)
+- **Online Demo**: [ModelScope Studio](https://www.modelscope.cn/studios/gongjy/MiniMind)
+
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..3fbeef1
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,53 @@
+# <strong>Welcome to MiniMind!</strong>
+
+
+<figure markdown>
+  ![logo](images/logo.png)
+</figure>
+
+## 📌 项目简介
+
+MiniMind 是一个完全从 0 开始训练的超小语言模型项目，**仅需 3 块钱成本 + 2 小时**即可训练出仅为 **26M** 的语言模型！
+
+- **MiniMind** 系列极其轻量，最小版本体积是 GPT-3 的 **1/7000**
+- 项目开源了大模型的极简结构，包含：
+  - 混合专家模型（MoE）
+  - 数据集清洗
+  - 预训练（Pretrain）
+  - 监督微调（SFT）
+  - LoRA 微调
+  - 直接偏好优化（DPO）
+  - 模型蒸馏
+- 所有核心算法代码均从 0 使用 PyTorch 原生重构，不依赖第三方抽象接口
+- 这不仅是大语言模型的全阶段开源复现，也是一个入门 LLM 的教程
+
+!!! note "训练成本"
+    "2小时" 基于 NVIDIA 3090 硬件设备（单卡）测试，"3块钱" 指 GPU 服务器租用成本
+
+## ✨ 主要特点
+
+- **超低成本**：单卡 3090，2 小时，3 块钱即可从 0 训练 ChatBot
+- **完整流程**：涵盖 Tokenizer、预训练、SFT、LoRA、DPO、蒸馏全流程
+- **教育友好**：代码简洁，适合学习 LLM 原理
+- **生态兼容**：支持 `transformers`、`llama.cpp`、`vllm`、`ollama` 等主流框架
+
+## 📊 模型列表
+
+| 模型 (大小) | 推理占用 (约) | Release |
+|------------|----------|---------|
+| MiniMind2-small (26M) | 0.5 GB | 2025.04.26 |
+| MiniMind2-MoE (145M) | 1.0 GB | 2025.04.26 |
+| MiniMind2 (104M) | 1.0 GB | 2025.04.26 |
+
+## 🚀 快速导航
+
+- [快速开始](quickstart.md) - 环境安装、模型下载、快速测试
+- [模型训练](training.md) - 预训练、SFT、LoRA、DPO 等训练流程
+
+## 🔗 相关链接
+
+- **GitHub**: [https://github.com/jingyaogong/minimind](https://github.com/jingyaogong/minimind)
+- **HuggingFace**: [MiniMind Collection](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
+- **ModelScope**: [MiniMind 模型](https://www.modelscope.cn/profile/gongjy)
+- **在线体验**: [ModelScope 创空间](https://www.modelscope.cn/studios/gongjy/MiniMind)
+
diff --git a/docs/quickstart.en.md b/docs/quickstart.en.md
new file mode 100644
index 0000000..c3e0ade
--- /dev/null
+++ b/docs/quickstart.en.md
@@ -0,0 +1,114 @@
+# Quick Start
+
+This page will help you quickly get started with the MiniMind project.
+
+## 📋 Requirements
+
+- **Python**: 3.10+
+- **PyTorch**: 1.12+
+- **CUDA**: 12.2+ (optional, for GPU acceleration)
+- **VRAM**: At least 8GB (24GB recommended)
+
+!!! tip "Hardware Configuration Reference"
+    - CPU: Intel i9-10980XE @ 3.00GHz
+    - RAM: 128 GB
+    - GPU: NVIDIA GeForce RTX 3090 (24GB)
+
+## 🚀 Testing Existing Models
+
+### 1. Clone the Project
+
+```bash
+git clone https://github.com/jingyaogong/minimind.git
+cd minimind
+```
+
+### 2. Install Dependencies
+
+```bash
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+```
+
+!!! warning "Torch CUDA Check"
+    After installation, test if Torch can use CUDA:
+    ```python
+    import torch
+    print(torch.cuda.is_available())
+    ```
+
+### 3. Download Model
+
+Download pretrained models from HuggingFace or ModelScope:
+
+```bash
+# From HuggingFace
+git clone https://huggingface.co/jingyaogong/MiniMind2
+
+# Or from ModelScope
+git clone https://www.modelscope.cn/models/gongjy/MiniMind2.git
+```
+
+### 4. Command Line Q&A
+
+```bash
+# load=0: load PyTorch model, load=1: load Transformers model
+python eval_model.py --load 1 --model_mode 2
+```
+
+### 5. Start WebUI (Optional)
+
+```bash
+# Requires Python >= 3.10
+pip install streamlit
+cd scripts
+streamlit run web_demo.py
+```
+
+Visit `http://localhost:8501` to use the web interface.
+
+## 🔧 Third-party Inference Frameworks
+
+MiniMind supports multiple mainstream inference frameworks:
+
+### Ollama
+
+```bash
+ollama run jingyaogong/minimind2
+```
+
+### vLLM
+
+```bash
+vllm serve ./MiniMind2/ --served-model-name "minimind"
+```
+
+### llama.cpp
+
+```bash
+# Convert model
+python convert_hf_to_gguf.py ./MiniMind2/
+
+# Quantize model
+./build/bin/llama-quantize ./MiniMind2/MiniMind2-109M-F16.gguf ./Q4-MiniMind2.gguf Q4_K_M
+
+# Inference
+./build/bin/llama-cli -m ./Q4-MiniMind2.gguf --chat-template chatml
+```
+
+## 📝 Effect Testing
+
+```text
+👶: Hello, please introduce yourself.
+🤖️: Hello! I'm MiniMind, an AI assistant developed by Jingyao Gong.
+    I interact with users through natural language processing and algorithm training.
+
+👶: What is the highest mountain in the world?
+🤖️: Mount Everest is the highest mountain in the world, located in the Himalayas,
+    with an elevation of 8,848.86 meters (29,031.7 feet).
+```
+
+## 🎯 Next Steps
+
+- Check [Model Training](training.en.md) to learn how to train your own model from scratch
+- Read the source code to understand LLM implementation principles
+
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 0000000..a8bbf94
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,114 @@
+# 快速开始
+
+本页面将帮助你快速上手 MiniMind 项目。
+
+## 📋 环境要求
+
+- **Python**: 3.10+
+- **PyTorch**: 1.12+
+- **CUDA**: 12.2+（可选，用于 GPU 加速）
+- **显存**: 至少 8GB（推荐 24GB）
+
+!!! tip "硬件配置参考"
+    - CPU: Intel i9-10980XE @ 3.00GHz
+    - RAM: 128 GB
+    - GPU: NVIDIA GeForce RTX 3090 (24GB)
+
+## 🚀 测试已有模型
+
+### 1. 克隆项目
+
+```bash
+git clone https://github.com/jingyaogong/minimind.git
+cd minimind
+```
+
+### 2. 安装依赖
+
+```bash
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+```
+
+!!! warning "Torch CUDA 检查"
+    安装后请测试 Torch 是否可用 CUDA：
+    ```python
+    import torch
+    print(torch.cuda.is_available())
+    ```
+
+### 3. 下载模型
+
+从 HuggingFace 或 ModelScope 下载预训练模型：
+
+```bash
+# 从 HuggingFace 下载
+git clone https://huggingface.co/jingyaogong/MiniMind2
+
+# 或从 ModelScope 下载
+git clone https://www.modelscope.cn/models/gongjy/MiniMind2.git
+```
+
+### 4. 命令行问答
+
+```bash
+# load=0: 加载 PyTorch 模型, load=1: 加载 Transformers 模型
+python eval_model.py --load 1 --model_mode 2
+```
+
+### 5. 启动 WebUI（可选）
+
+```bash
+# 需要 Python >= 3.10
+pip install streamlit
+cd scripts
+streamlit run web_demo.py
+```
+
+访问 `http://localhost:8501` 即可使用 Web 界面。
+
+## 🔧 第三方推理框架
+
+MiniMind 支持多种主流推理框架：
+
+### Ollama
+
+```bash
+ollama run jingyaogong/minimind2
+```
+
+### vLLM
+
+```bash
+vllm serve ./MiniMind2/ --served-model-name "minimind"
+```
+
+### llama.cpp
+
+```bash
+# 转换模型
+python convert_hf_to_gguf.py ./MiniMind2/
+
+# 量化模型
+./build/bin/llama-quantize ./MiniMind2/MiniMind2-109M-F16.gguf ./Q4-MiniMind2.gguf Q4_K_M
+
+# 推理
+./build/bin/llama-cli -m ./Q4-MiniMind2.gguf --chat-template chatml
+```
+
+## 📝 效果测试
+
+```text
+👶: 你好，请介绍一下自己。
+🤖️: 你好！我是 MiniMind，一个由 Jingyao Gong 开发的人工智能助手。
+    我通过自然语言处理和算法训练来与用户进行交互。
+
+👶: 世界上最高的山峰是什么？
+🤖️: 珠穆朗玛峰是世界上最高的山峰，位于喜马拉雅山脉，
+    海拔 8,848.86 米（29,031.7 英尺）。
+```
+
+## 🎯 下一步
+
+- 查看 [模型训练](training.md) 了解如何从 0 开始训练自己的模型
+- 阅读源码了解 LLM 的实现原理
+
diff --git a/docs/training.en.md b/docs/training.en.md
new file mode 100644
index 0000000..dd6624d
--- /dev/null
+++ b/docs/training.en.md
@@ -0,0 +1,186 @@
+# Model Training
+
+This page introduces how to train MiniMind language models from scratch.
+
+## 📊 Data Preparation
+
+### 1. Download Dataset
+
+Download datasets from [ModelScope](https://www.modelscope.cn/datasets/gongjy/minimind_dataset/files) or [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset).
+
+Create `./dataset` directory and place data files:
+
+```bash
+./dataset/
+├── pretrain_hq.jsonl (1.6GB, ✨Recommended)
+├── sft_mini_512.jsonl (1.2GB, ✨Recommended)
+├── sft_512.jsonl (7.5GB)
+├── sft_1024.jsonl (5.6GB)
+├── sft_2048.jsonl (9GB)
+├── dpo.jsonl (909MB)
+├── r1_mix_1024.jsonl (340MB)
+└── lora_*.jsonl
+```
+
+!!! tip "Recommended Combination"
+    Fastest reproduction: `pretrain_hq.jsonl` + `sft_mini_512.jsonl`
+    
+    **Single 3090 only needs 2 hours + $0.5!**
+
+### 2. Data Format
+
+**Pretrain Data** (`pretrain_hq.jsonl`):
+```json
+{"text": "How to overcome procrastination? Overcoming procrastination is not easy..."}
+```
+
+**SFT Data** (`sft_*.jsonl`):
+```json
+{
+  "conversations": [
+    {"role": "user", "content": "Hello"},
+    {"role": "assistant", "content": "Hello!"}
+  ]
+}
+```
+
+## 🎯 Training Pipeline
+
+All training scripts are located in the `./trainer` directory.
+
+### 1. Pretraining
+
+The pretraining stage lets the model learn basic knowledge, the goal is to **learn word continuation**.
+
+```bash
+cd trainer
+python train_pretrain.py
+
+# Multi-GPU training
+torchrun --nproc_per_node 2 train_pretrain.py
+```
+
+Output weights: `./out/pretrain_*.pth`
+
+!!! info "Training Duration"
+    - MiniMind2-Small (26M): ~1.1h (single 3090)
+    - MiniMind2 (104M): ~3.9h (single 3090)
+
+### 2. Supervised Fine-Tuning (SFT)
+
+The SFT stage teaches the model conversation patterns and adapts to chat templates.
+
+```bash
+python train_full_sft.py
+
+# Multi-GPU training
+torchrun --nproc_per_node 2 train_full_sft.py
+```
+
+Output weights: `./out/full_sft_*.pth`
+
+!!! info "Training Duration"
+    - MiniMind2-Small: ~1h (using sft_mini_512)
+    - MiniMind2: ~3.3h (using sft_mini_512)
+
+### 3. LoRA Fine-tuning (Optional)
+
+LoRA is a parameter-efficient fine-tuning method, suitable for domain adaptation.
+
+```bash
+python train_lora.py
+```
+
+**Use Cases**:
+- Medical Q&A: use `lora_medical.jsonl`
+- Self-awareness: use `lora_identity.jsonl`
+
+Output weights: `./out/lora/lora_*.pth`
+
+### 4. DPO Reinforcement Learning (Optional)
+
+DPO is used to optimize model response quality to better align with human preferences.
+
+```bash
+python train_dpo.py
+```
+
+Output weights: `./out/rlhf_*.pth`
+
+### 5. Reasoning Model Distillation (Optional)
+
+Distill reasoning capabilities from DeepSeek-R1.
+
+```bash
+python train_distill_reason.py
+```
+
+Output weights: `./out/reason_*.pth`
+
+## 📈 Model Architecture
+
+MiniMind uses Transformer Decoder-Only architecture (similar to Llama3):
+
+![structure](images/LLM-structure.png)
+
+### Model Parameter Configuration
+
+| Model Name | params | d_model | n_layers | kv_heads | q_heads |
+|------------|--------|---------|----------|----------|---------|
+| MiniMind2-Small | 26M | 512 | 8 | 2 | 8 |
+| MiniMind2-MoE | 145M | 640 | 8 | 2 | 8 |
+| MiniMind2 | 104M | 768 | 16 | 2 | 8 |
+
+## 🧪 Test Model
+
+```bash
+# model_mode: 0=pretrain, 1=sft, 2=rlhf, 3=reason
+python eval_model.py --model_mode 1
+
+# Test LoRA model
+python eval_model.py --lora_name 'lora_medical' --model_mode 2
+```
+
+## 🔧 Multi-GPU Training
+
+### DDP Method
+
+```bash
+torchrun --nproc_per_node N train_xxx.py
+```
+
+### DeepSpeed Method
+
+```bash
+deepspeed --master_port 29500 --num_gpus=N train_xxx.py
+```
+
+### Wandb Monitoring
+
+```bash
+# Login first
+wandb login
+
+# Enable wandb
+torchrun --nproc_per_node N train_xxx.py --use_wandb
+```
+
+## 💰 Training Cost
+
+Based on single NVIDIA 3090:
+
+| Dataset Combination | Duration | Cost | Effect |
+|-----------|------|------|------|
+| pretrain_hq + sft_mini_512 | 2.1h | ≈$0.35 | 😊😊 Basic chat |
+| Full dataset (MiniMind2-Small) | 38h | ≈$6.50 | 😊😊😊😊😊😊 Complete capabilities |
+| Full dataset (MiniMind2) | 122h | ≈$20.80 | 😊😊😊😊😊😊😊😊 Best performance |
+
+!!! success "Quick Reproduction"
+    Using `pretrain_hq` + `sft_mini_512`, single 3090 only needs **2 hours + $0.5** to train a ChatBot!
+
+## 📝 Common Issues
+
+- **Out of memory**: Reduce `batch_size` or use DeepSpeed
+- **Training not converging**: Adjust learning rate or check data quality
+- **Multi-GPU training error**: Ensure all GPUs are visible and CUDA versions are consistent
+
diff --git a/docs/training.md b/docs/training.md
new file mode 100644
index 0000000..c1c80d8
--- /dev/null
+++ b/docs/training.md
@@ -0,0 +1,186 @@
+# 模型训练
+
+本页面介绍如何从 0 开始训练 MiniMind 语言模型。
+
+## 📊 数据准备
+
+### 1. 下载数据集
+
+从 [ModelScope](https://www.modelscope.cn/datasets/gongjy/minimind_dataset/files) 或 [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset) 下载数据集。
+
+创建 `./dataset` 目录并放入数据文件：
+
+```bash
+./dataset/
+├── pretrain_hq.jsonl (1.6GB, ✨推荐)
+├── sft_mini_512.jsonl (1.2GB, ✨推荐)
+├── sft_512.jsonl (7.5GB)
+├── sft_1024.jsonl (5.6GB)
+├── sft_2048.jsonl (9GB)
+├── dpo.jsonl (909MB)
+├── r1_mix_1024.jsonl (340MB)
+└── lora_*.jsonl
+```
+
+!!! tip "推荐组合"
+    最快速度复现：`pretrain_hq.jsonl` + `sft_mini_512.jsonl`
+    
+    **单卡 3090 仅需 2 小时 + 3 块钱！**
+
+### 2. 数据格式
+
+**预训练数据** (`pretrain_hq.jsonl`):
+```json
+{"text": "如何才能摆脱拖延症？治愈拖延症并不容易..."}
+```
+
+**SFT 数据** (`sft_*.jsonl`):
+```json
+{
+  "conversations": [
+    {"role": "user", "content": "你好"},
+    {"role": "assistant", "content": "你好！"}
+  ]
+}
+```
+
+## 🎯 训练流程
+
+所有训练脚本位于 `./trainer` 目录。
+
+### 1. 预训练（Pretrain）
+
+预训练阶段让模型学习基础知识，目标是**学会词语接龙**。
+
+```bash
+cd trainer
+python train_pretrain.py
+
+# 多卡训练
+torchrun --nproc_per_node 2 train_pretrain.py
+```
+
+输出权重：`./out/pretrain_*.pth`
+
+!!! info "训练时长"
+    - MiniMind2-Small (26M): ~1.1h (单卡 3090)
+    - MiniMind2 (104M): ~3.9h (单卡 3090)
+
+### 2. 监督微调（SFT）
+
+SFT 阶段让模型学习对话方式，适应聊天模板。
+
+```bash
+python train_full_sft.py
+
+# 多卡训练
+torchrun --nproc_per_node 2 train_full_sft.py
+```
+
+输出权重：`./out/full_sft_*.pth`
+
+!!! info "训练时长"
+    - MiniMind2-Small: ~1h (使用 sft_mini_512)
+    - MiniMind2: ~3.3h (使用 sft_mini_512)
+
+### 3. LoRA 微调（可选）
+
+LoRA 是一种参数高效的微调方法，适合领域适配。
+
+```bash
+python train_lora.py
+```
+
+**应用场景**：
+- 医疗问答：使用 `lora_medical.jsonl`
+- 自我认知：使用 `lora_identity.jsonl`
+
+输出权重：`./out/lora/lora_*.pth`
+
+### 4. DPO 强化学习（可选）
+
+DPO 用于优化模型回复质量，使其更符合人类偏好。
+
+```bash
+python train_dpo.py
+```
+
+输出权重：`./out/rlhf_*.pth`
+
+### 5. 推理模型蒸馏（可选）
+
+蒸馏 DeepSeek-R1 的推理能力。
+
+```bash
+python train_distill_reason.py
+```
+
+输出权重：`./out/reason_*.pth`
+
+## 📈 模型结构
+
+MiniMind 使用 Transformer Decoder-Only 结构（类似 Llama3）：
+
+![structure](images/LLM-structure.png)
+
+### 模型参数配置
+
+| Model Name | params | d_model | n_layers | kv_heads | q_heads |
+|------------|--------|---------|----------|----------|---------|
+| MiniMind2-Small | 26M | 512 | 8 | 2 | 8 |
+| MiniMind2-MoE | 145M | 640 | 8 | 2 | 8 |
+| MiniMind2 | 104M | 768 | 16 | 2 | 8 |
+
+## 🧪 测试模型
+
+```bash
+# model_mode: 0=pretrain, 1=sft, 2=rlhf, 3=reason
+python eval_model.py --model_mode 1
+
+# 测试 LoRA 模型
+python eval_model.py --lora_name 'lora_medical' --model_mode 2
+```
+
+## 🔧 多卡训练
+
+### DDP 方式
+
+```bash
+torchrun --nproc_per_node N train_xxx.py
+```
+
+### DeepSpeed 方式
+
+```bash
+deepspeed --master_port 29500 --num_gpus=N train_xxx.py
+```
+
+### Wandb 监控
+
+```bash
+# 需要先登录
+wandb login
+
+# 启用 wandb
+torchrun --nproc_per_node N train_xxx.py --use_wandb
+```
+
+## 💰 训练成本
+
+基于单卡 NVIDIA 3090：
+
+| 数据集组合 | 时长 | 成本 | 效果 |
+|-----------|------|------|------|
+| pretrain_hq + sft_mini_512 | 2.1h | ≈2.73￥ | 😊😊 基础对话 |
+| 完整数据集 (MiniMind2-Small) | 38h | ≈49.61￥ | 😊😊😊😊😊😊 完整能力 |
+| 完整数据集 (MiniMind2) | 122h | ≈158.6￥ | 😊😊😊😊😊😊😊😊 最强性能 |
+
+!!! success "极速复现"
+    使用 `pretrain_hq` + `sft_mini_512`，单卡 3090 仅需 **2 小时 + 3 块钱**即可训练出能对话的 ChatBot！
+
+## 📝 常见问题
+
+- **显存不足**：减小 `batch_size` 或使用 DeepSpeed
+- **训练不收敛**：调整学习率或检查数据质量
+- **多卡训练报错**：确保所有卡都可见且 CUDA 版本一致
+
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..94b2d03
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,89 @@
+site_name: MiniMind
+site_description: MiniMind - 轻量级语言模型训练框架 / Lightweight Language Model Training Framework
+site_author: jingyaogong
+site_url: https://minimind.readthedocs.io/
+
+# 多语言配置
+plugins:
+  - search:
+      lang:
+        - zh
+        - en
+  - i18n:
+      docs_structure: suffix
+      fallback_to_default: true
+      reconfigure_material: true
+      reconfigure_search: true
+      languages:
+        - locale: zh
+          default: true
+          name: 简体中文
+          build: true
+          nav_translations:
+            首页: 首页
+            快速开始: 快速开始
+            模型训练: 模型训练
+        - locale: en
+          name: English
+          build: true
+          nav_translations:
+            首页: Home
+            快速开始: Quick Start
+            模型训练: Model Training
+
+# 主题配置
+theme:
+  name: material
+  favicon: images/logo.png
+  icon:
+    logo: material/book-open-page-variant
+  palette:
+    # 浅色模式
+    - scheme: default
+      primary: white
+      accent: blue
+      toggle:
+        icon: material/brightness-7
+        name: 切换至深色模式
+    # 深色模式
+    - scheme: slate
+      primary: black
+      accent: blue
+      toggle:
+        icon: material/brightness-4
+        name: 切换至浅色模式
+  features:
+    - navigation.instant      # 即时加载
+    - navigation.tracking     # 锚点跟踪
+    - navigation.sections     # 导航分组
+    - navigation.expand       # 默认展开导航
+    - navigation.top          # 返回顶部按钮
+    - search.suggest          # 搜索建议
+    - search.highlight        # 搜索高亮
+    - content.code.copy       # 代码复制按钮
+    - toc.follow              # 目录跟随
+    - toc.integrate           # 目录集成到左侧边栏
+  language: zh
+
+# 导航结构
+nav:
+  - 首页: index.md
+  - 快速开始: quickstart.md
+  - 模型训练: training.md
+
+# Markdown 扩展
+markdown_extensions:
+  - toc:
+      permalink: true
+  - admonition
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+  - pymdownx.details
+  - pymdownx.tabbed:
+      alternate_style: true
+  - attr_list
+  - md_in_html
+