From 6bedefcaca2223bdd09de97cbca10362e358c1d8 Mon Sep 17 00:00:00 2001
From: jingyaogong <gongjy.cs@qq.com>
Date: Sun, 26 Oct 2025 18:59:16 +0800
Subject: [PATCH] [feat] update docs

---
 docs/quickstart.md | 24 ++++++++++++------------
 docs/training.md   | 40 +++++++++++++++++++++++++++++++---------
 2 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 266540f..0cb0131 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -64,16 +64,16 @@ git clone https://www.modelscope.cn/models/gongjy/MiniMind2.git
 ### 3. Command-Line Chat
 
 ```bash
-# load=0: load PyTorch model, load=1: load transformers model
-python eval_model.py --load 1 --model_mode 2
+# Use transformers format model
+python eval_llm.py --load_from ./MiniMind2
 ```
 
-**Model Modes**:
-- `model_mode 0`: Pretrain model (word continuation)
-- `model_mode 1`: SFT Chat model (conversation)
-- `model_mode 2`: RLHF model (refined responses, currently same as SFT for small models)
-- `model_mode 3`: Reasoning model (with thinking chains)
-- `model_mode 4/5`: RLAIF models (PPO/GRPO trained)
+**Weight Options** (`--weight` parameter):
+- `pretrain`: Pretrain model (word continuation)
+- `full_sft`: SFT Chat model (conversation)
+- `dpo`: DPO model (preference optimization)
+- `reason`: Reasoning model (with thinking chains)
+- `ppo_actor`, `grpo`, `spo`: RLAIF models (reinforcement learning trained)
 
 **Example Session**:
 ```text
@@ -103,7 +103,7 @@ Visit `http://localhost:8501` to use the interactive web interface.
 Extend context length beyond training with RoPE extrapolation:
 
 ```bash
-python eval_model.py --inference_rope_scaling True
+python eval_llm.py --weight full_sft --inference_rope_scaling
 ```
 
 This enables the YaRN algorithm to handle sequences longer than the 2K training context, useful for processing documents and long conversations.
@@ -227,10 +227,10 @@ A: 珠穆朗玛峰（Mount Everest）是世界上最高的山峰，位于喜马
 **Solution**:
 ```bash
 # Reduce batch size
-python eval_model.py --batch_size 1
+python eval_llm.py --batch_size 1
 
 # Or use CPU (slow but works)
-python eval_model.py --device cpu
+python eval_llm.py --device cpu
 ```
 
 ### Issue: Slow Inference
@@ -244,7 +244,7 @@ python eval_model.py --device cpu
 ### Issue: Model Responses Are Poor Quality
 
 **Possible Causes**:
-- Using pretrain model (`model_mode 0`) instead of SFT (`model_mode 1`)
+- Using pretrain model (`--weight pretrain`) instead of SFT (`--weight full_sft`)
 - Model is undertrained - download the full checkpoint instead
 - Input prompt is too short - provide more context
 
diff --git a/docs/training.md b/docs/training.md
index 8fdf672..e4b1ba8 100644
--- a/docs/training.md
+++ b/docs/training.md
@@ -54,7 +54,7 @@ cd dataset
 ├── sft_512.jsonl (7.5GB, standard SFT)
 ├── sft_1024.jsonl (5.6GB, longer SFT)
 ├── sft_2048.jsonl (9GB, very long SFT)
-├── dpo.jsonl (909MB, DPO training)
+├── dpo.jsonl ✨ (55MB, DPO training - optimized and simplified)
 ├── r1_mix_1024.jsonl (340MB, reasoning distillation)
 ├── rlaif-mini.jsonl (1MB, RLAIF algorithms)
 ├── lora_identity.jsonl (22.8KB, identity LoRA)
@@ -112,6 +112,25 @@ All training scripts are in the `./trainer` directory.
 cd trainer
 ```
 
+!!! info "💡 Checkpoint Resume Training"
+    All training scripts automatically save checkpoints. Simply add `--from_resume 1` parameter to automatically detect, load & resume training:
+    
+    ```bash
+    python train_pretrain.py --from_resume 1
+    python train_full_sft.py --from_resume 1
+    python train_dpo.py --from_resume 1
+    # ... and all other training scripts
+    ```
+    
+    **Checkpoint Resume Mechanism:**
+    
+    - Training process automatically saves complete checkpoints in `./checkpoints/` directory (model, optimizer, training progress, etc.)
+    - Checkpoint file naming: `<weight_name>_<dimension>_resume.pth` (e.g., `full_sft_512_resume.pth`)
+    - Supports cross-GPU recovery (automatically adjusts step)
+    - Supports wandb training log continuity (automatically resumes the same run)
+    
+    > Suitable for long training sessions or unstable environments, no need to worry about progress loss from interruptions
+
 ### Stage 1: Pretraining
 
 **Purpose**: Learn foundational knowledge (word continuation)
@@ -234,7 +253,7 @@ python train_dpo.py
 torchrun --nproc_per_node 2 train_dpo.py
 ```
 
-**Output**: `./out/rlhf_*.pth`
+**Output**: `./out/dpo_*.pth`
 
 **Key Features**:
 - Off-policy training (reuse data across epochs)
@@ -439,35 +458,38 @@ python train_xxx.py --use_wandb  # Automatically uses SwanLab if available
 ### Evaluate Pretrain Model
 
 ```bash
-python eval_model.py --model_mode 0
+python eval_llm.py --weight pretrain
 ```
 
 ### Evaluate Chat Model
 
 ```bash
-python eval_model.py --model_mode 1
+python eval_llm.py --weight full_sft
 ```
 
 ### Evaluate with LoRA
 
 ```bash
-python eval_model.py --lora_name 'lora_medical' --model_mode 1
+python eval_llm.py --weight dpo --lora_weight lora_medical
 ```
 
 ### Evaluate Reasoning Model
 
 ```bash
-python eval_model.py --model_mode 3
+python eval_llm.py --weight reason
 ```
 
 ### Evaluate RLAIF Models
 
 ```bash
 # PPO model
-python eval_model.py --model_mode 4
+python eval_llm.py --weight ppo_actor
 
 # GRPO model
-python eval_model.py --model_mode 4
+python eval_llm.py --weight grpo
+
+# SPO model
+python eval_llm.py --weight spo
 ```
 
 ### RoPE Length Extrapolation
@@ -475,7 +497,7 @@ python eval_model.py --model_mode 4
 Test with extended context:
 
 ```bash
-python eval_model.py --model_mode 1 --inference_rope_scaling True
+python eval_llm.py --weight full_sft --inference_rope_scaling
 ```
 
 ## 📐 Model Architecture