mirror of
https://github.com/jingyaogong/minimind.git
synced 2026-04-25 08:48:16 +08:00
Merge b3069d4743 into 83e52f6a27
This commit is contained in:
commit
fe7fc29435
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,4 +1,8 @@
|
||||
__pycache__
|
||||
model/__pycache__
|
||||
out
|
||||
website/
|
||||
docs-minimind/
|
||||
docs-minimind/
|
||||
logfile
|
||||
dataset
|
||||
checkpoints
|
||||
1
minimind_sdk/__init__.py
Normal file
1
minimind_sdk/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .client import MinimindClient
|
||||
65
minimind_sdk/client.py
Normal file
65
minimind_sdk/client.py
Normal file
@ -0,0 +1,65 @@
|
||||
import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
class MinimindClient:
|
||||
def __init__(self, base_url, api_key=None, timeout=10):
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.api_key = api_key or ''
|
||||
self.timeout = timeout
|
||||
|
||||
def _request(self, method, path, body=None, expect_text=False):
|
||||
url = f"{self.base_url}{path}"
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache'
|
||||
}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f"Bearer {self.api_key}"
|
||||
data = None
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode('utf-8')
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
|
||||
raw = resp.read()
|
||||
if expect_text:
|
||||
return raw.decode('utf-8', errors='replace')
|
||||
return json.loads(raw.decode('utf-8'))
|
||||
except urllib.error.HTTPError as e:
|
||||
msg = e.read().decode('utf-8', errors='replace')
|
||||
raise RuntimeError(f"HTTP {e.code}: {msg}")
|
||||
except urllib.error.URLError as e:
|
||||
raise RuntimeError(str(e))
|
||||
|
||||
def register(self, name, email):
|
||||
res = self._request('POST', '/api/register', {'name': name, 'email': email})
|
||||
self.api_key = res.get('api_key', self.api_key)
|
||||
return res
|
||||
|
||||
def start_training(self, train_type, **params):
|
||||
payload = {'train_type': train_type}
|
||||
payload.update(params or {})
|
||||
res = self._request('POST', '/train', payload)
|
||||
return res
|
||||
|
||||
def get_processes(self):
|
||||
return self._request('GET', '/processes', None)
|
||||
|
||||
def get_logs(self, process_id):
|
||||
return self._request('GET', f"/logs/{process_id}", None, expect_text=True)
|
||||
|
||||
def stop(self, process_id):
|
||||
return self._request('POST', f"/stop/{process_id}", None)
|
||||
|
||||
def delete(self, process_id):
|
||||
return self._request('POST', f"/delete/{process_id}", None)
|
||||
|
||||
def get_logfiles(self):
|
||||
return self._request('GET', '/logfiles', None)
|
||||
|
||||
def get_logfile_content(self, filename):
|
||||
return self._request('GET', f"/logfile-content/{filename}", None, expect_text=True)
|
||||
|
||||
def delete_logfile(self, filename):
|
||||
return self._request('DELETE', f"/delete-logfile/{filename}", None)
|
||||
81
trainer_web/dispatcher.py
Normal file
81
trainer_web/dispatcher.py
Normal file
@ -0,0 +1,81 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
def build_command(train_type, params, gpu_num, use_torchrun):
|
||||
if train_type == 'pretrain':
|
||||
script_path = '../trainer/train_pretrain.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'save_weight' in params:
|
||||
cmd.extend(['--save_weight', params['save_weight']])
|
||||
elif train_type == 'sft':
|
||||
script_path = '../trainer/train_full_sft.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'save_weight' in params:
|
||||
cmd.extend(['--save_weight', params['save_weight']])
|
||||
elif train_type == 'lora':
|
||||
script_path = '../trainer/train_lora.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'lora_name' in params:
|
||||
cmd.extend(['--lora_name', params['lora_name']])
|
||||
elif train_type == 'dpo':
|
||||
script_path = '../trainer/train_dpo.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'beta' in params and params['beta']:
|
||||
cmd.extend(['--beta', params['beta']])
|
||||
if 'accumulation_steps' in params and params['accumulation_steps']:
|
||||
cmd.extend(['--accumulation_steps', params['accumulation_steps']])
|
||||
if 'grad_clip' in params and params['grad_clip']:
|
||||
cmd.extend(['--grad_clip', params['grad_clip']])
|
||||
elif train_type == 'ppo':
|
||||
script_path = '../trainer/train_ppo.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'clip_epsilon' in params and params['clip_epsilon']:
|
||||
cmd.extend(['--clip_epsilon', params['clip_epsilon']])
|
||||
if 'vf_coef' in params and params['vf_coef']:
|
||||
cmd.extend(['--vf_coef', params['vf_coef']])
|
||||
if 'kl_coef' in params and params['kl_coef']:
|
||||
cmd.extend(['--kl_coef', params['kl_coef']])
|
||||
if 'reasoning' in params and params['reasoning']:
|
||||
cmd.extend(['--reasoning', params['reasoning']])
|
||||
if 'update_old_actor_freq' in params and params['update_old_actor_freq']:
|
||||
cmd.extend(['--update_old_actor_freq', params['update_old_actor_freq']])
|
||||
if 'reward_model_path' in params and params['reward_model_path']:
|
||||
cmd.extend(['--reward_model_path', params['reward_model_path']])
|
||||
elif train_type == 'grpo':
|
||||
script_path = '../trainer/train_grpo.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'beta' in params and params['beta']:
|
||||
cmd.extend(['--beta', params['beta']])
|
||||
if 'num_generations' in params and params['num_generations']:
|
||||
cmd.extend(['--num_generations', params['num_generations']])
|
||||
if 'reasoning' in params and params['reasoning']:
|
||||
cmd.extend(['--reasoning', params['reasoning']])
|
||||
if 'reward_model_path' in params and params['reward_model_path']:
|
||||
cmd.extend(['--reward_model_path', params['reward_model_path']])
|
||||
elif train_type == 'spo':
|
||||
script_path = '../trainer/train_spo.py'
|
||||
cmd = ['torchrun', '--nproc_per_node', str(gpu_num), script_path] if use_torchrun else [sys.executable, script_path]
|
||||
if 'beta' in params and params['beta']:
|
||||
cmd.extend(['--beta', params['beta']])
|
||||
if 'reasoning' in params and params['reasoning']:
|
||||
cmd.extend(['--reasoning', params['reasoning']])
|
||||
if 'reward_model_path' in params and params['reward_model_path']:
|
||||
cmd.extend(['--reward_model_path', params['reward_model_path']])
|
||||
else:
|
||||
return None
|
||||
|
||||
for key, value in params.items():
|
||||
if key in ['train_type', 'save_weight', 'lora_name', 'train_monitor', 'beta', 'accumulation_steps', 'grad_clip', 'gpu_num', 'clip_epsilon', 'vf_coef', 'kl_coef', 'reasoning', 'update_old_actor_freq', 'reward_model_path', 'num_generations'] or ((train_type == 'ppo' or train_type == 'grpo' or train_type == 'spo') and key == 'from_weight'):
|
||||
continue
|
||||
elif key == 'from_resume':
|
||||
cmd.extend([f'--{key}', str(value)])
|
||||
else:
|
||||
cmd.extend([f'--{key}', str(value)])
|
||||
|
||||
if 'train_monitor' in params:
|
||||
if params['train_monitor'] == 'wandb' or params['train_monitor'] == 'swanlab':
|
||||
cmd.append('--use_wandb')
|
||||
if params['train_monitor'] == 'wandb':
|
||||
cmd.extend(['--wandb_project', 'minimind_training'])
|
||||
|
||||
return cmd
|
||||
91
trainer_web/start_web_ui.sh
Executable file
91
trainer_web/start_web_ui.sh
Executable file
@ -0,0 +1,91 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 获取脚本所在目录(兼容 macOS)
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
# 检查是否已经有实例在运行
|
||||
if [ -f "train_web_ui.pid" ]; then
|
||||
pid=$(cat "train_web_ui.pid")
|
||||
if ps -p "$pid" > /dev/null 2>&1; then
|
||||
echo "Web UI 服务已经在运行 (PID: $pid)"
|
||||
exit 1
|
||||
else
|
||||
echo "删除旧的PID文件"
|
||||
rm "train_web_ui.pid"
|
||||
fi
|
||||
fi
|
||||
|
||||
# 创建日志目录
|
||||
LOG_DIR="../logfile"
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# 生成时间戳
|
||||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||||
LOG_FILE="$LOG_DIR/web_ui_$TIMESTAMP.log"
|
||||
|
||||
echo "启动 MiniMind Web UI 服务..."
|
||||
echo "日志文件: $LOG_FILE"
|
||||
|
||||
# 依赖预检
|
||||
python - <<'PY'
|
||||
import sys
|
||||
missing = []
|
||||
for m in ('flask', 'psutil'):
|
||||
try:
|
||||
__import__(m)
|
||||
except Exception as e:
|
||||
missing.append(f"{m}: {e.__class__.__name__} {e}")
|
||||
if missing:
|
||||
print("依赖缺失或不可用:\n" + "\n".join(missing))
|
||||
sys.exit(1)
|
||||
PY
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "启动失败:请先安装缺失依赖,例如 'pip install flask psutil'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 使用nohup启动服务
|
||||
nohup python -u train_web_ui.py > "$LOG_FILE" 2>&1 &
|
||||
|
||||
# 保存PID
|
||||
echo $! > "train_web_ui.pid"
|
||||
|
||||
# 轮询日志以获取实际端口号(最多等待10秒)
|
||||
PORT=""
|
||||
for i in {1..20}; do
|
||||
# 提取形如 http://0.0.0.0:12345 的地址,再截取端口
|
||||
PORT=$(grep -Eo 'http://0\.0\.0\.0:[0-9]+' "$LOG_FILE" | tail -n1 | awk -F: '{print $NF}')
|
||||
if [ -n "$PORT" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
# 如果仍未获取到端口,回退为默认提示端口(与后端起始端口一致)
|
||||
# 健康检查:验证端口响应(最多等待10秒)
|
||||
if [ -n "$PORT" ]; then
|
||||
for i in {1..20}; do
|
||||
if curl -s "http://localhost:$PORT/healthz" | grep -Eq '"status"[[:space:]]*:[[:space:]]*"ok"'; then
|
||||
echo "服务已启动! PID: $(cat "train_web_ui.pid")"
|
||||
echo "访问地址: http://localhost:$PORT"
|
||||
echo "停止命令: kill $(cat "train_web_ui.pid") or bash trainer_web/stop_web_ui.sh"
|
||||
exit 0
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
fi
|
||||
|
||||
# 启动失败处理:打印日志并退出非零
|
||||
echo "服务启动失败,请查看日志"
|
||||
tail -n 50 "$LOG_FILE" || true
|
||||
|
||||
if [ -f "train_web_ui.pid" ]; then
|
||||
pid=$(cat "train_web_ui.pid")
|
||||
if ps -p "$pid" > /dev/null 2>&1; then
|
||||
kill "$pid" >/dev/null 2>&1 || true
|
||||
fi
|
||||
rm -f "train_web_ui.pid"
|
||||
fi
|
||||
|
||||
exit 1
|
||||
1365
trainer_web/static/css/style.css
Normal file
1365
trainer_web/static/css/style.css
Normal file
File diff suppressed because it is too large
Load Diff
BIN
trainer_web/static/images/logo2.png
Normal file
BIN
trainer_web/static/images/logo2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 615 KiB |
362
trainer_web/static/js/app.js
Normal file
362
trainer_web/static/js/app.js
Normal file
@ -0,0 +1,362 @@
|
||||
import { openTab as _openTab } from './ui/tabs.js';
|
||||
import { initTrainForm } from './train/form.js';
|
||||
import { startProcessPolling, stopProcessPolling, loadProcesses } from './processes/list.js';
|
||||
import { loadLogFiles } from './logfiles/list.js';
|
||||
import { refreshLog } from './processes/logs.js';
|
||||
|
||||
const hooks = {
|
||||
onEnterProcesses: () => {
|
||||
// 当切换到进程标签页时,立即加载一次,然后开始轮询
|
||||
loadProcesses().then(() => {
|
||||
startProcessPolling();
|
||||
});
|
||||
},
|
||||
onLeaveProcesses: () => {
|
||||
stopProcessPolling();
|
||||
},
|
||||
onEnterLogfiles: () => {
|
||||
loadLogFiles();
|
||||
},
|
||||
};
|
||||
|
||||
window.openTab = (evt, tabName) => _openTab(evt, tabName, hooks);
|
||||
|
||||
// 文件夹选择器功能 - 直接显示服务器端文件浏览器
|
||||
window.selectFolder = (inputId) => {
|
||||
// 直接使用远程文件浏览器,不尝试本地文件系统访问
|
||||
openRemoteFileBrowser(inputId);
|
||||
};
|
||||
|
||||
// 远程文件浏览器 - 支持文件和文件夹选择
|
||||
let currentFileBrowserTarget = null;
|
||||
let currentBrowsePath = './';
|
||||
let selectedFilePath = null;
|
||||
let currentSelectionMode = 'auto'; // 'file', 'folder', or 'auto'
|
||||
|
||||
function openRemoteFileBrowser(inputId) {
|
||||
console.log('openRemoteFileBrowser called with:', inputId);
|
||||
currentFileBrowserTarget = inputId;
|
||||
|
||||
// 根据输入框ID确定选择模式
|
||||
if (inputId === 'data_path') {
|
||||
currentSelectionMode = 'file'; // 数据路径需要文件选择
|
||||
console.log('Mode set to: FILE selection');
|
||||
} else if (inputId === 'save_dir' || inputId.includes('reward_model_path')) {
|
||||
currentSelectionMode = 'folder'; // 保存目录和奖励模型路径需要文件夹选择
|
||||
console.log('Mode set to: FOLDER selection');
|
||||
} else {
|
||||
currentSelectionMode = 'auto'; // 自动模式
|
||||
console.log('Mode set to: AUTO selection');
|
||||
}
|
||||
|
||||
const modal = document.getElementById('file-browser-modal');
|
||||
if (modal) {
|
||||
modal.classList.remove('hidden');
|
||||
console.log('Modal opened successfully');
|
||||
} else {
|
||||
console.error('Modal element not found!');
|
||||
return;
|
||||
}
|
||||
|
||||
// 重置选择状态
|
||||
selectedFilePath = null;
|
||||
const selectedPathInput = document.getElementById('selected-path');
|
||||
if (selectedPathInput) {
|
||||
selectedPathInput.value = '';
|
||||
console.log('Selected path input cleared');
|
||||
}
|
||||
|
||||
// 加载初始路径
|
||||
loadQuickPaths();
|
||||
browsePath('./');
|
||||
}
|
||||
|
||||
function closeFileBrowser() {
|
||||
document.getElementById('file-browser-modal').classList.add('hidden');
|
||||
currentFileBrowserTarget = null;
|
||||
currentBrowsePath = './';
|
||||
selectedFilePath = null;
|
||||
currentSelectionMode = 'auto';
|
||||
}
|
||||
|
||||
function confirmFileSelection() {
|
||||
console.log('confirmFileSelection called');
|
||||
console.log('selectedFilePath:', selectedFilePath);
|
||||
console.log('currentFileBrowserTarget:', currentFileBrowserTarget);
|
||||
|
||||
if (selectedFilePath && currentFileBrowserTarget) {
|
||||
const targetElement = document.getElementById(currentFileBrowserTarget);
|
||||
console.log('targetElement:', targetElement);
|
||||
|
||||
if (targetElement) {
|
||||
targetElement.value = selectedFilePath;
|
||||
console.log('Value set successfully');
|
||||
closeFileBrowser();
|
||||
} else {
|
||||
console.error('Target element not found:', currentFileBrowserTarget);
|
||||
alert('错误:无法找到目标输入框');
|
||||
}
|
||||
} else {
|
||||
console.log('Missing selection or target');
|
||||
alert('请先选择文件或文件夹');
|
||||
}
|
||||
}
|
||||
|
||||
function navigateToParent() {
|
||||
if (window.currentParentPath) {
|
||||
// 使用后端提供的父目录路径(绝对路径)
|
||||
browsePath(window.currentParentPath);
|
||||
} else if (currentBrowsePath && currentBrowsePath !== './') {
|
||||
// 回退到基于当前路径的计算
|
||||
const parentPath = currentBrowsePath.includes('/') ?
|
||||
currentBrowsePath.substring(0, currentBrowsePath.lastIndexOf('/')) : './';
|
||||
browsePath(parentPath || './');
|
||||
}
|
||||
}
|
||||
|
||||
function selectCurrentDirectory() {
|
||||
// 选择当前目录
|
||||
selectedFilePath = currentBrowsePath;
|
||||
document.getElementById('selected-path').value = currentBrowsePath;
|
||||
// 可以关闭模态框或让用户继续浏览
|
||||
}
|
||||
|
||||
async function loadQuickPaths() {
|
||||
try {
|
||||
const response = await fetch('/api/quick-paths');
|
||||
const data = await response.json();
|
||||
|
||||
const quickPathsContainer = document.getElementById('quick-paths');
|
||||
quickPathsContainer.innerHTML = '';
|
||||
|
||||
if (data.paths && data.paths.length > 0) {
|
||||
data.paths.forEach(path => {
|
||||
const btn = document.createElement('button');
|
||||
btn.className = 'quick-path-btn';
|
||||
btn.textContent = path.name;
|
||||
btn.onclick = () => browsePath(path.path);
|
||||
btn.title = path.path;
|
||||
quickPathsContainer.appendChild(btn);
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('加载快捷路径失败:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async function browsePath(path) {
|
||||
console.log('browsePath called with:', path);
|
||||
try {
|
||||
currentBrowsePath = path;
|
||||
selectedFilePath = null; // 重置选中的文件路径
|
||||
document.getElementById('selected-path').value = ''; // 清空显示
|
||||
|
||||
// 更新帮助文本
|
||||
updateHelpText();
|
||||
|
||||
const response = await fetch(`/api/browse?path=${encodeURIComponent(path)}`);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
alert(`浏览失败: ${data.error}`);
|
||||
return;
|
||||
}
|
||||
|
||||
renderFileList(data);
|
||||
console.log('File list rendered successfully');
|
||||
} catch (error) {
|
||||
console.error('浏览路径失败:', error);
|
||||
alert('浏览路径失败,请检查网络连接');
|
||||
}
|
||||
}
|
||||
|
||||
function renderFileList(data) {
|
||||
const fileList = document.getElementById('file-list');
|
||||
fileList.innerHTML = '';
|
||||
|
||||
if (!data.items || data.items.length === 0) {
|
||||
fileList.innerHTML = '<div style="padding: 2rem; text-align: center; color: var(--text-secondary);">此目录为空</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
// 更新当前路径显示(使用相对路径用于显示)
|
||||
document.getElementById('current-path').textContent = data.relative_path || data.current_path;
|
||||
|
||||
// 存储父目录路径供导航使用
|
||||
window.currentParentPath = data.parent;
|
||||
|
||||
// 先显示目录,再显示文件
|
||||
const directories = data.items.filter(item => item.type === 'directory');
|
||||
const files = data.items.filter(item => item.type === 'file');
|
||||
|
||||
// 渲染目录
|
||||
directories.forEach(item => {
|
||||
const div = createFileItem(item, '📁');
|
||||
fileList.appendChild(div);
|
||||
});
|
||||
|
||||
// 渲染文件(仅在文件选择模式或自动模式下显示)
|
||||
if (currentSelectionMode !== 'folder') {
|
||||
files.forEach(item => {
|
||||
const div = createFileItem(item, '📄');
|
||||
fileList.appendChild(div);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function createFileItem(item, icon) {
|
||||
const div = document.createElement('div');
|
||||
div.className = 'file-item';
|
||||
|
||||
// 根据选择模式添加适当的CSS类
|
||||
if (currentSelectionMode === 'file' && item.type === 'directory') {
|
||||
// 文件选择模式下,文件夹只用于导航,不能选择
|
||||
div.classList.add('navigable');
|
||||
} else if (currentSelectionMode === 'folder' && item.type === 'file') {
|
||||
// 文件夹选择模式下,文件不能被选择
|
||||
div.classList.add('disabled');
|
||||
}
|
||||
|
||||
div.onclick = (event) => selectFileItem(item, event);
|
||||
|
||||
div.innerHTML = `
|
||||
<span class="file-icon">${icon}</span>
|
||||
<span class="file-name">${item.name}</span>
|
||||
<span class="file-info">${item.type === 'file' ? formatFileSize(item.size) : '文件夹'}</span>
|
||||
`;
|
||||
|
||||
return div;
|
||||
}
|
||||
|
||||
function selectFileItem(item, event) {
|
||||
console.log('selectFileItem called with:', item);
|
||||
console.log('currentSelectionMode:', currentSelectionMode);
|
||||
console.log('event:', event);
|
||||
|
||||
// 检查是否点击了被禁用的项目
|
||||
if (event && event.currentTarget && event.currentTarget.classList.contains('disabled')) {
|
||||
console.log('Clicked disabled item, ignoring');
|
||||
return;
|
||||
}
|
||||
|
||||
if (item.type === 'directory') {
|
||||
// 文件夹:根据选择模式决定行为
|
||||
if (currentSelectionMode === 'file') {
|
||||
// 文件选择模式:只能选择文件,点击进入目录
|
||||
console.log('File mode: navigating into directory');
|
||||
browsePath(item.path);
|
||||
} else if (currentSelectionMode === 'folder') {
|
||||
// 文件夹选择模式:可以选择文件夹
|
||||
console.log('Folder mode: selecting directory');
|
||||
selectedFilePath = item.path;
|
||||
document.getElementById('selected-path').value = item.path;
|
||||
// 高亮显示选中的文件夹
|
||||
document.querySelectorAll('.file-item').forEach(el => el.classList.remove('selected'));
|
||||
if (event && event.currentTarget) {
|
||||
event.currentTarget.classList.add('selected');
|
||||
}
|
||||
console.log('Directory selected:', selectedFilePath);
|
||||
} else {
|
||||
// 自动模式:点击进入目录
|
||||
console.log('Auto mode: navigating into directory');
|
||||
browsePath(item.path);
|
||||
}
|
||||
} else {
|
||||
// 文件:选中文件路径(仅在选择文件或自动模式下)
|
||||
if (currentSelectionMode !== 'folder') {
|
||||
console.log('Selecting file:', item.path);
|
||||
selectedFilePath = item.path;
|
||||
document.getElementById('selected-path').value = item.path;
|
||||
// 高亮显示选中的文件
|
||||
document.querySelectorAll('.file-item').forEach(el => el.classList.remove('selected'));
|
||||
if (event && event.currentTarget) {
|
||||
event.currentTarget.classList.add('selected');
|
||||
}
|
||||
console.log('File selected:', selectedFilePath);
|
||||
} else {
|
||||
console.log('File clicked in folder mode, ignoring');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function formatFileSize(bytes) {
|
||||
if (bytes === 0) return '0 B';
|
||||
const k = 1024;
|
||||
const sizes = ['B', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
|
||||
}
|
||||
|
||||
function updateHelpText() {
|
||||
const helpText = document.querySelector('.file-browser-help');
|
||||
const modalTitle = document.getElementById('modal-title');
|
||||
|
||||
if (!helpText) return;
|
||||
|
||||
let text = '';
|
||||
let title = '';
|
||||
switch (currentSelectionMode) {
|
||||
case 'file':
|
||||
text = '💡 请选择文件:点击文件选择,点击文件夹进入目录,使用📍选择当前目录';
|
||||
title = '选择文件';
|
||||
break;
|
||||
case 'folder':
|
||||
text = '💡 请选择文件夹:点击文件夹选择,点击文件无效,使用📍选择当前目录';
|
||||
title = '选择文件夹';
|
||||
break;
|
||||
default:
|
||||
text = '💡 点击文件夹进入目录,点击文件选择文件,使用📍选择当前目录';
|
||||
title = '选择文件或文件夹';
|
||||
}
|
||||
helpText.textContent = text;
|
||||
if (modalTitle) {
|
||||
modalTitle.textContent = title;
|
||||
}
|
||||
}
|
||||
|
||||
// 添加模态框键盘事件监听
|
||||
document.addEventListener('keydown', function(event) {
|
||||
if (event.key === 'Escape') {
|
||||
closeFileBrowser();
|
||||
}
|
||||
});
|
||||
|
||||
// 添加模态框点击外部关闭功能
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const modal = document.getElementById('file-browser-modal');
|
||||
if (modal) {
|
||||
modal.addEventListener('click', function(event) {
|
||||
if (event.target === modal) {
|
||||
closeFileBrowser();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 将文件浏览器函数暴露到全局作用域
|
||||
window.selectFolder = selectFolder;
|
||||
window.openRemoteFileBrowser = openRemoteFileBrowser;
|
||||
window.closeFileBrowser = closeFileBrowser;
|
||||
window.confirmFileSelection = confirmFileSelection;
|
||||
window.navigateToParent = navigateToParent;
|
||||
window.selectCurrentDirectory = selectCurrentDirectory;
|
||||
|
||||
// 将进程管理函数暴露到全局作用域
|
||||
window.refreshProcesses = () => {
|
||||
// 立即刷新进程数据,然后重置轮询计时器
|
||||
return loadProcesses().then(() => {
|
||||
// 重置轮询计时器以确保平滑的更新间隔
|
||||
stopProcessPolling();
|
||||
startProcessPolling();
|
||||
});
|
||||
};
|
||||
window.refreshLogs = loadLogFiles;
|
||||
window.refreshLog = refreshLog;
|
||||
|
||||
window.addEventListener('load', () => {
|
||||
initTrainForm();
|
||||
// 不再立即开始轮询,而是等待用户切换到进程标签页
|
||||
// startProcessPolling(); // 移动到钩子函数中
|
||||
loadProcesses(); // 仍然加载初始进程数据
|
||||
});
|
||||
|
||||
194
trainer_web/static/js/logfiles/list.js
Normal file
194
trainer_web/static/js/logfiles/list.js
Normal file
@ -0,0 +1,194 @@
|
||||
import { getLogFiles, getLogFileContent, deleteLogFile as apiDeleteLogFile } from '../services/apiClient.js';
|
||||
import { el } from '../utils/dom.js';
|
||||
import { showNotification } from '../ui/notify.js';
|
||||
import { showConfirmDialog } from '../ui/dialog.js';
|
||||
|
||||
export function loadLogFiles() {
|
||||
return getLogFiles().then((data) => {
|
||||
const list = document.getElementById('logfiles-list');
|
||||
list.innerHTML = '';
|
||||
if (data.length === 0) {
|
||||
list.innerHTML = '<p>暂无日志文件</p>';
|
||||
return;
|
||||
}
|
||||
data.sort((a, b) => new Date(b.modified_time) - new Date(a.modified_time));
|
||||
const groups = {};
|
||||
data.forEach((f) => {
|
||||
let type = '自定义训练';
|
||||
const n = f.filename;
|
||||
if (n.includes('train_pretrain_')) type = 'pretrain';
|
||||
else if (n.includes('train_sft_')) type = 'sft';
|
||||
else if (n.includes('train_lora_')) type = 'lora';
|
||||
else if (n.includes('train_dpo_')) type = 'dpo';
|
||||
else if (n.includes('train_ppo_')) type = 'ppo';
|
||||
else if (n.includes('train_grpo_')) type = 'grpo';
|
||||
else if (n.includes('train_spo_')) type = 'spo';
|
||||
f.train_type = type;
|
||||
if (!groups[type]) groups[type] = [];
|
||||
groups[type].push(f);
|
||||
});
|
||||
const order = ['pretrain', 'sft', 'lora', 'dpo', 'ppo', 'grpo', 'spo', '未知'];
|
||||
[...order.filter((t) => groups[t]), ...Object.keys(groups).filter((t) => !order.includes(t))].forEach((t) => {
|
||||
list.appendChild(createTypeGroupWithToggle(t, groups[t]));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function createTypeGroupWithToggle(trainType, files) {
|
||||
const group = el('div', { class: 'process-type-group' });
|
||||
const header = el('div', { class: 'process-type-header' });
|
||||
header.dataset.expanded = 'true';
|
||||
const title = el('h3', { class: 'process-type-title', text: getTrainTypeDisplayName(trainType) });
|
||||
const toggle = el('button', { class: 'toggle-btn' });
|
||||
toggle.innerHTML = '▼';
|
||||
toggle.onclick = (e) => {
|
||||
e.stopPropagation();
|
||||
toggleGroup(header);
|
||||
};
|
||||
header.appendChild(title);
|
||||
header.appendChild(toggle);
|
||||
header.onclick = () => toggleGroup(header);
|
||||
const content = el('div', { class: 'process-type-content' });
|
||||
files.forEach((f) => addLogFileItemToGroup(content, f));
|
||||
group.appendChild(header);
|
||||
group.appendChild(content);
|
||||
return group;
|
||||
}
|
||||
|
||||
function toggleGroup(header) {
|
||||
const expanded = header.dataset.expanded === 'true';
|
||||
const content = header.nextElementSibling;
|
||||
const toggle = header.querySelector('.toggle-btn');
|
||||
if (expanded) {
|
||||
header.dataset.expanded = 'false';
|
||||
content.style.maxHeight = '0';
|
||||
content.style.overflow = 'hidden';
|
||||
toggle.innerHTML = '▶';
|
||||
} else {
|
||||
content.style.overflow = 'hidden';
|
||||
content.style.maxHeight = 'none';
|
||||
const h = content.scrollHeight;
|
||||
content.style.maxHeight = '0';
|
||||
content.offsetHeight;
|
||||
header.dataset.expanded = 'true';
|
||||
content.style.maxHeight = h + 'px';
|
||||
setTimeout(() => {
|
||||
content.style.maxHeight = 'none';
|
||||
content.style.overflow = 'visible';
|
||||
}, 300);
|
||||
toggle.innerHTML = '▼';
|
||||
}
|
||||
}
|
||||
|
||||
function getTrainTypeDisplayName(trainType) {
|
||||
const names = {
|
||||
pretrain: '预训练 (Pretrain)',
|
||||
sft: '全参数监督微调 (SFT - Full)',
|
||||
lora: 'LoRA监督微调 (SFT - Lora)',
|
||||
dpo: '直接偏好优化 (RL - DPO)',
|
||||
ppo: 'PPO',
|
||||
grpo: 'GRPO',
|
||||
spo: 'SPO',
|
||||
};
|
||||
return names[trainType] || trainType;
|
||||
}
|
||||
|
||||
function addLogFileItemToGroup(parent, logfile) {
|
||||
const item = el('div', { class: 'process-item' });
|
||||
item.innerHTML = `
|
||||
<div class="process-info">
|
||||
<div><strong>${logfile.filename}</strong></div>
|
||||
<div>
|
||||
<span class="process-status status-completed">已保存</span>
|
||||
<span style="margin-left: 10px; color: #999; font-size: 0.9em;">${logfile.modified_time}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<button class="btn-logs" data-view="${logfile.filename}">查看日志</button>
|
||||
<button class="btn-delete" data-del="${logfile.filename}">删除</button>
|
||||
</div>
|
||||
<div id="log-content-${logfile.filename.replace(/\./g, '-') }" class="logs-container hidden"></div>
|
||||
`;
|
||||
parent.appendChild(item);
|
||||
bindItemButtons(item, logfile);
|
||||
}
|
||||
|
||||
function bindItemButtons(item, logfile) {
|
||||
const viewBtn = item.querySelector('[data-view]');
|
||||
if (viewBtn) viewBtn.addEventListener('click', () => viewLogFile(logfile.filename, viewBtn));
|
||||
const delBtn = item.querySelector('[data-del]');
|
||||
if (delBtn) delBtn.addEventListener('click', () => deleteLogFile(logfile.filename, delBtn));
|
||||
}
|
||||
|
||||
function deleteLogFile(filename, button) {
|
||||
showConfirmDialog(`确定要删除日志文件 "${filename}" 吗?此操作无法恢复。`, () => {
|
||||
const item = button.closest('.process-item');
|
||||
const content = item.closest('.process-type-content');
|
||||
const group = content.closest('.process-type-group');
|
||||
const original = button.textContent;
|
||||
button.textContent = '删除中...';
|
||||
button.disabled = true;
|
||||
apiDeleteLogFile(filename)
|
||||
.then((data) => {
|
||||
if (data.success) {
|
||||
item.remove();
|
||||
if (content.children.length === 0) group.remove();
|
||||
else {
|
||||
const header = content.previousElementSibling;
|
||||
if (header && header.dataset.expanded === 'true') {
|
||||
content.style.maxHeight = 'none';
|
||||
const h = content.scrollHeight;
|
||||
content.style.maxHeight = h + 'px';
|
||||
}
|
||||
}
|
||||
showNotification(`日志文件 "${filename}" 已成功删除`);
|
||||
} else throw new Error(data.message || '删除失败');
|
||||
})
|
||||
.catch((e) => {
|
||||
showNotification(`删除失败: ${e.message}`, 'error');
|
||||
button.textContent = original;
|
||||
button.disabled = false;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function viewLogFile(filename, button) {
|
||||
const safe = filename.replace(/[^a-zA-Z0-9_.-]/g, '_').replace(/\./g, '-');
|
||||
const item = button.closest('.process-item');
|
||||
const container = item.querySelector(`#log-content-${safe}`);
|
||||
const content = item.closest('.process-type-content');
|
||||
const header = content ? content.previousElementSibling : null;
|
||||
if (content && header && header.dataset.expanded !== 'true') toggleGroup(header);
|
||||
if (container.classList.contains('hidden')) {
|
||||
container.classList.remove('hidden');
|
||||
container.textContent = '加载中...';
|
||||
getLogFileContent(filename)
|
||||
.then((logs) => {
|
||||
container.textContent = logs;
|
||||
container.scrollTop = 0;
|
||||
updateContentHeight(content, header);
|
||||
})
|
||||
.catch((e) => {
|
||||
container.textContent = `获取日志失败: ${e.message}`;
|
||||
updateContentHeight(content, header);
|
||||
});
|
||||
} else {
|
||||
container.classList.add('hidden');
|
||||
updateContentHeight(content, header);
|
||||
}
|
||||
}
|
||||
|
||||
function updateContentHeight(content, header) {
|
||||
if (content && header && header.dataset.expanded === 'true') {
|
||||
const current = content.style.maxHeight;
|
||||
content.style.maxHeight = 'none';
|
||||
const h = content.scrollHeight;
|
||||
if (current === 'none' || parseInt(current) !== h) {
|
||||
content.style.maxHeight = h + 'px';
|
||||
setTimeout(() => {
|
||||
if (header.dataset.expanded === 'true') content.style.maxHeight = 'none';
|
||||
}, 300);
|
||||
} else content.style.maxHeight = current;
|
||||
}
|
||||
}
|
||||
|
||||
634
trainer_web/static/js/processes/list.js
Normal file
634
trainer_web/static/js/processes/list.js
Normal file
@ -0,0 +1,634 @@
|
||||
import { getProcesses, stopProcess as apiStop, deleteProcess as apiDelete } from '../services/apiClient.js';
|
||||
import { showNotification } from '../ui/notify.js';
|
||||
import { showConfirmDialog } from '../ui/dialog.js';
|
||||
import { el, clearChildren } from '../utils/dom.js';
|
||||
import { showLogs, refreshLog, clearLogTimerFor } from './logs.js';
|
||||
|
||||
// 计算训练进度信息
|
||||
function calculateRemainingTime(current, total, logText) {
|
||||
// 尝试从日志中提取时间信息
|
||||
const timePatterns = [
|
||||
/remaining[\s:=]\s*(\d+)[\s:]?(\d+)?[\s:]?(\d+)?/i, // remaining: 1:30:45 or remaining: 90
|
||||
/ETA[\s:=]\s*(\d+):(\d+):(\d+)/i, // ETA: 1:30:45
|
||||
/预计剩余[\s:=]\s*(\d+)[\s小时]*[\s:]?(\d+)?[\s分钟]*/i, // 预计剩余: 1小时30分钟
|
||||
/剩余时间[\s:=]\s*(\d+)[\s小时]*[\s:]?(\d+)?[\s分钟]*/i, // 剩余时间: 1小时30分钟
|
||||
/time left[\s:=]\s*(\d+)[\s:]?(\d+)?[\s:]?(\d+)?/i, // time left: 1:30:45
|
||||
/还需[\s:=]\s*(\d+)[\s小时]*[\s:]?(\d+)?[\s分钟]*/i // 还需: 1小时30分钟
|
||||
];
|
||||
|
||||
for (const pattern of timePatterns) {
|
||||
const match = logText.match(pattern);
|
||||
if (match) {
|
||||
const hours = parseInt(match[1]) || 0;
|
||||
const minutes = parseInt(match[2]) || 0;
|
||||
const seconds = parseInt(match[3]) || 0;
|
||||
|
||||
if (hours > 0 || minutes > 0 || seconds > 0) {
|
||||
const parts = [];
|
||||
if (hours > 0) parts.push(`${hours}小时`);
|
||||
if (minutes > 0) parts.push(`${minutes}分钟`);
|
||||
if (seconds > 0 && hours === 0 && minutes === 0) parts.push(`${seconds}秒`);
|
||||
|
||||
return parts.join('');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没有找到时间信息,根据进度估算
|
||||
if (current > 0 && current < total) {
|
||||
const remainingEpochs = total - current;
|
||||
// 假设每个epoch大约需要一定时间,这里使用简单的线性估算
|
||||
// 实际应用中可以根据历史数据更准确地估算
|
||||
return `约${remainingEpochs}个epoch`;
|
||||
}
|
||||
|
||||
return '计算中...';
|
||||
}
|
||||
|
||||
function calculateProgress(process) {
|
||||
const defaultProgress = {
|
||||
percentage: 0,
|
||||
current: 0,
|
||||
total: 0,
|
||||
remaining: '计算中...',
|
||||
loss: null,
|
||||
epoch: null,
|
||||
lr: null
|
||||
};
|
||||
|
||||
// 如果进程不在运行,返回默认进度
|
||||
if (!process.running) return defaultProgress;
|
||||
|
||||
// 从进程数据中提取进度信息
|
||||
if (process.progress) {
|
||||
return {
|
||||
percentage: process.progress.percentage || 0,
|
||||
current: process.progress.current_epoch || 0,
|
||||
total: process.progress.total_epochs || 0,
|
||||
remaining: process.progress.remaining_time || '计算中...',
|
||||
loss: process.progress.current_loss || null,
|
||||
epoch: process.progress.current_epoch ? `${process.progress.current_epoch}/${process.progress.total_epochs}` : null,
|
||||
lr: process.progress.current_lr || null,
|
||||
step: process.progress.current_step && process.progress.total_steps ?
|
||||
`${process.progress.current_step}/${process.progress.total_steps}` : null,
|
||||
currentStep: process.progress.current_step || 0,
|
||||
totalSteps: process.progress.total_steps || 0
|
||||
};
|
||||
}
|
||||
|
||||
// 尝试从日志中提取进度信息(增强版本)
|
||||
if (process.logs) {
|
||||
const logText = process.logs.slice(-2000); // 取最近2000字符以获取更多上下文
|
||||
|
||||
// 提取epoch信息 - 支持多种格式
|
||||
const epochPatterns = [
|
||||
/epoch\s+(\d+)\s*\/\s*(\d+)/i, // epoch 3/10
|
||||
/Epoch\s+(\d+)\s*of\s*(\d+)/i, // Epoch 3 of 10
|
||||
/\[(\d+)\/(\d+)\]/i, // [3/10]
|
||||
/epoch\s*[::]\s*(\d+)\s*\/\s*(\d+)/i, // epoch: 3/10
|
||||
/第\s*(\d+)\s*轮\s*\/\s*共\s*(\d+)\s*轮/i // 第3轮/共10轮
|
||||
];
|
||||
|
||||
let current = 0;
|
||||
let total = 0;
|
||||
let percentage = 0;
|
||||
let currentStep = 0;
|
||||
let totalSteps = 0;
|
||||
let stepInfo = null;
|
||||
|
||||
for (const pattern of epochPatterns) {
|
||||
const match = logText.match(pattern);
|
||||
if (match) {
|
||||
current = parseInt(match[1]);
|
||||
total = parseInt(match[2]);
|
||||
percentage = total > 0 ? Math.round((current / total) * 100) : 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 提取step信息 - 支持多种格式
|
||||
const stepPatterns = [
|
||||
/step\s+(\d+)\s*\/\s*(\d+)/i, // step 150/1000
|
||||
/Step\s+(\d+)\s*of\s*(\d+)/i, // Step 150 of 1000
|
||||
/\[(\d+)\/(\d+)\]/i, // [150/1000]
|
||||
/step\s*[::]\s*(\d+)\s*\/\s*(\d+)/i, // step: 150/1000
|
||||
/第\s*(\d+)\s*步\s*\/\s*共\s*(\d+)\s*步/i, // 第150步/共1000步
|
||||
/步数\s*(\d+)\s*\/\s*(\d+)/i, // 步数 150/1000
|
||||
/batch\s+(\d+)\s*\/\s*(\d+)/i, // batch 150/1000
|
||||
/Batch\s+(\d+)\s*of\s*(\d+)/i // Batch 150 of 1000
|
||||
];
|
||||
|
||||
for (const pattern of stepPatterns) {
|
||||
const match = logText.match(pattern);
|
||||
if (match) {
|
||||
currentStep = parseInt(match[1]);
|
||||
totalSteps = parseInt(match[2]);
|
||||
stepInfo = `${currentStep}/${totalSteps}`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 提取loss信息 - 支持多种格式
|
||||
const lossPatterns = [
|
||||
/loss[\s:=]\s*([\d.]+(?:e[+-]?\d+)?)/i, // loss: 4.32 or loss = 4.32
|
||||
/training_loss[\s:=]\s*([\d.]+(?:e[+-]?\d+)?)/i, // training_loss: 4.32
|
||||
/train_loss[\s:=]\s*([\d.]+(?:e[+-]?\d+)?)/i, // train_loss: 4.32
|
||||
/Loss[\s:=]\s*([\d.]+(?:e[+-]?\d+)?)/i, // Loss: 4.32
|
||||
/训练损失[\s:=]\s*([\d.]+(?:e[+-]?\d+)?)/i, // 训练损失: 4.32
|
||||
/损失[\s:=]\s*([\d.]+(?:e[+-]?\d+)?)/i, // 损失: 4.32
|
||||
/\s+([\d.]+(?:e[+-]?\d+)?)\s*loss/i, // 4.32 loss
|
||||
/\s+([\d.]+(?:e[+-]?\d+)?)\s*训练损失/i, // 4.32 训练损失
|
||||
/(?:loss|损失|training_loss|train_loss)\s*=\s*([\d.]+(?:e[+-]?\d+)?)/i // loss = 4.32
|
||||
];
|
||||
|
||||
let currentLoss = null;
|
||||
for (const pattern of lossPatterns) {
|
||||
const matches = [...logText.matchAll(pattern)];
|
||||
if (matches.length > 0) {
|
||||
// 取最后一个匹配的loss值
|
||||
const lastMatch = matches[matches.length - 1];
|
||||
const lossValue = parseFloat(lastMatch[1]);
|
||||
if (!isNaN(lossValue) && lossValue > 0 && lossValue < 100) { // 合理的loss范围
|
||||
currentLoss = lossValue.toFixed(4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 提取学习率信息
|
||||
const lrPatterns = [
|
||||
/lr[\s:=]\s*([\d.e+-]+)/i, // lr: 1e-4
|
||||
/learning_rate[\s:=]\s*([\d.e+-]+)/i, // learning_rate: 1e-4
|
||||
/LR[\s:=]\s*([\d.e+-]+)/i, // LR: 1e-4
|
||||
/学习率[\s:=]\s*([\d.e+-]+)/i // 学习率: 1e-4
|
||||
];
|
||||
|
||||
let currentLr = null;
|
||||
for (const pattern of lrPatterns) {
|
||||
const matches = [...logText.matchAll(pattern)];
|
||||
if (matches.length > 0) {
|
||||
const lastMatch = matches[matches.length - 1];
|
||||
const lrValue = parseFloat(lastMatch[1]);
|
||||
if (!isNaN(lrValue) && lrValue > 0 && lrValue < 1) { // 合理的lr范围
|
||||
currentLr = lrValue.toExponential(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果找到了有效的epoch信息,返回进度
|
||||
if (total > 0) {
|
||||
// 重新计算百分比 - 支持epoch和step双重进度
|
||||
let finalPercentage = percentage;
|
||||
if (totalSteps > 0 && currentStep > 0) {
|
||||
// 基础epoch进度
|
||||
const epochPercentage = (current / total) * 100;
|
||||
// 当前epoch内的step进度
|
||||
const stepPercentageInEpoch = (currentStep / totalSteps) * 100;
|
||||
// 将step进度加到epoch进度上(每个epoch占总进度的1/total)
|
||||
const stepContribution = stepPercentageInEpoch / total;
|
||||
finalPercentage = Math.min(100, Math.max(0, Math.round(epochPercentage + stepContribution)));
|
||||
}
|
||||
|
||||
return {
|
||||
percentage: finalPercentage,
|
||||
current,
|
||||
total,
|
||||
remaining: calculateRemainingTime(current, total, logText),
|
||||
loss: currentLoss,
|
||||
epoch: `${current}/${total}`,
|
||||
lr: currentLr,
|
||||
step: stepInfo,
|
||||
currentStep,
|
||||
totalSteps
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return defaultProgress;
|
||||
}
|
||||
|
||||
let processPollingTimer = null;
|
||||
|
||||
export function startProcessPolling() {
|
||||
if (processPollingTimer) clearInterval(processPollingTimer);
|
||||
// 使用2秒间隔进行实时更新,确保进度信息及时刷新
|
||||
processPollingTimer = setInterval(() => {
|
||||
const tab = document.querySelector('.tab.active');
|
||||
if (tab && tab.textContent.includes('进程')) {
|
||||
checkProcessStatusChanges();
|
||||
}
|
||||
}, 2000);
|
||||
}
|
||||
|
||||
export function stopProcessPolling() {
|
||||
if (processPollingTimer) {
|
||||
clearInterval(processPollingTimer);
|
||||
processPollingTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
export function checkProcessStatusChanges() {
|
||||
return getProcesses()
|
||||
.then((data) => {
|
||||
let updatedCount = 0;
|
||||
data.forEach((p) => {
|
||||
const item = document.querySelector(`[data-process-id="${p.id}"]`);
|
||||
if (!item) return;
|
||||
const cur = item.dataset.processStatus;
|
||||
const next = p.status;
|
||||
|
||||
// 如果状态发生变化,更新整个项目
|
||||
if (cur !== next) {
|
||||
updateProcessItem(item, p);
|
||||
if (next === '出错') showNotification(`进程 ${p.train_type} 已出错`, 'error');
|
||||
updatedCount++;
|
||||
}
|
||||
// 如果进程正在运行,即使状态没变也要更新进度信息
|
||||
else if (p.running) {
|
||||
updateProcessProgress(item, p);
|
||||
updatedCount++;
|
||||
}
|
||||
});
|
||||
|
||||
// 调试用:在控制台显示更新信息(生产环境中可以移除)
|
||||
if (updatedCount > 0) {
|
||||
console.log(`[${new Date().toLocaleTimeString()}] 更新了 ${updatedCount} 个进程的进度信息`);
|
||||
}
|
||||
})
|
||||
.catch(() => {
|
||||
showNotification('连接服务器失败,请刷新页面重试', 'error');
|
||||
});
|
||||
}
|
||||
|
||||
export function loadProcesses() {
|
||||
return getProcesses().then((data) => {
|
||||
const list = document.getElementById('process-list');
|
||||
clearChildren(list);
|
||||
if (data.length === 0) {
|
||||
list.innerHTML = '<p>暂无训练进程</p>';
|
||||
return;
|
||||
}
|
||||
data.sort((a, b) => new Date(b.start_time) - new Date(a.start_time));
|
||||
const groups = {};
|
||||
data.forEach((p) => {
|
||||
if (!groups[p.train_type]) groups[p.train_type] = [];
|
||||
groups[p.train_type].push(p);
|
||||
});
|
||||
const order = ['pretrain', 'sft', 'lora', 'dpo'];
|
||||
const types = [...order.filter((t) => groups[t]), ...Object.keys(groups).filter((t) => !order.includes(t))];
|
||||
types.forEach((t) => {
|
||||
const g = createTypeGroupWithToggle(t, groups[t]);
|
||||
list.appendChild(g);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function createTypeGroupWithToggle(trainType, processes) {
|
||||
const group = el('div', { class: 'process-type-group' });
|
||||
const header = el('div', { class: 'process-type-header' });
|
||||
header.dataset.expanded = 'true';
|
||||
const title = el('h3', { class: 'process-type-title', text: getTrainTypeDisplayName(trainType) });
|
||||
const toggle = el('button', { class: 'toggle-btn' });
|
||||
toggle.innerHTML = '▼';
|
||||
toggle.onclick = (e) => {
|
||||
e.stopPropagation();
|
||||
toggleGroup(header);
|
||||
};
|
||||
header.appendChild(title);
|
||||
header.appendChild(toggle);
|
||||
header.onclick = () => toggleGroup(header);
|
||||
const content = el('div', { class: 'process-type-content' });
|
||||
processes.forEach((p) => addProcessItemToGroup(content, p));
|
||||
group.appendChild(header);
|
||||
group.appendChild(content);
|
||||
return group;
|
||||
}
|
||||
|
||||
function toggleGroup(header) {
|
||||
const expanded = header.dataset.expanded === 'true';
|
||||
const content = header.nextElementSibling;
|
||||
const toggle = header.querySelector('.toggle-btn');
|
||||
if (expanded) {
|
||||
header.dataset.expanded = 'false';
|
||||
content.style.maxHeight = '0';
|
||||
content.style.overflow = 'hidden';
|
||||
toggle.innerHTML = '▶';
|
||||
} else {
|
||||
content.style.overflow = 'hidden';
|
||||
content.style.maxHeight = 'none';
|
||||
const h = content.scrollHeight;
|
||||
content.style.maxHeight = '0';
|
||||
content.offsetHeight;
|
||||
header.dataset.expanded = 'true';
|
||||
content.style.maxHeight = h + 'px';
|
||||
setTimeout(() => {
|
||||
content.style.maxHeight = 'none';
|
||||
content.style.overflow = 'visible';
|
||||
}, 300);
|
||||
toggle.innerHTML = '▼';
|
||||
}
|
||||
}
|
||||
|
||||
function getTrainTypeDisplayName(trainType) {
|
||||
const names = {
|
||||
pretrain: '预训练 (Pretrain)',
|
||||
sft: '全参数监督微调 (SFT - Full)',
|
||||
lora: 'LoRA监督微调 (SFT - Lora)',
|
||||
dpo: '直接偏好优化 (RL - DPO)',
|
||||
ppo: 'PPO',
|
||||
grpo: 'GRPO',
|
||||
spo: 'SPO',
|
||||
};
|
||||
return names[trainType] || trainType;
|
||||
}
|
||||
|
||||
export function addProcessItemToGroup(parent, process) {
|
||||
const item = el('div', { class: 'process-item' });
|
||||
let statusClass = 'status-completed';
|
||||
if (process.status === '运行中') statusClass = 'status-running';
|
||||
else if (process.status === '手动停止') statusClass = 'status-manual-stop';
|
||||
else if (process.status === '出错') statusClass = 'status-error';
|
||||
item.dataset.processId = process.id;
|
||||
item.dataset.processStatus = process.status;
|
||||
item.dataset.trainMonitor = process.train_monitor || 'none';
|
||||
item.dataset.swanlabUrl = process.swanlab_url || '';
|
||||
const showDelete = !process.running;
|
||||
const showSwanlab = process.train_monitor !== 'none';
|
||||
const swanBtn = showSwanlab ? `<button class="btn-swanlab" data-swan="${process.id}">SwanLab</button>` : '';
|
||||
|
||||
// 计算进度信息
|
||||
const progressInfo = calculateProgress(process);
|
||||
const progressBar = process.running ? `
|
||||
<div class="progress-container">
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" style="width: ${progressInfo.percentage}%"></div>
|
||||
</div>
|
||||
<div class="progress-info">
|
||||
<span>进度: ${progressInfo.current}/${progressInfo.total}${progressInfo.step ? ` (${progressInfo.step})` : ''}</span>
|
||||
<span>剩余时间: ${progressInfo.remaining}</span>
|
||||
</div>
|
||||
<div class="progress-metrics">
|
||||
${progressInfo.loss ? `<div class="metric-item"><span class="metric-label">Loss:</span><span class="metric-value">${progressInfo.loss}</span></div>` : ''}
|
||||
${progressInfo.epoch ? `<div class="metric-item"><span class="metric-label">Epoch:</span><span class="metric-value">${progressInfo.epoch}</span></div>` : ''}
|
||||
${progressInfo.step ? `<div class="metric-item"><span class="metric-label">Step:</span><span class="metric-value">${progressInfo.step}</span></div>` : ''}
|
||||
${progressInfo.lr ? `<div class="metric-item"><span class="metric-label">LR:</span><span class="metric-value">${progressInfo.lr}</span></div>` : ''}
|
||||
</div>
|
||||
</div>
|
||||
` : '';
|
||||
|
||||
item.innerHTML = `
|
||||
<div class="process-info">
|
||||
<div><strong>${process.start_time}</strong></div>
|
||||
<div><span class="process-status ${statusClass}">${process.status}</span></div>
|
||||
</div>
|
||||
${progressBar}
|
||||
<div>
|
||||
<button class="btn-logs" data-show="${process.id}">查看日志</button>
|
||||
<button class="btn-logs" data-refresh="${process.id}">刷新日志</button>
|
||||
${swanBtn}
|
||||
${process.running ? `<button class="btn-stop" data-stop="${process.id}">停止训练</button>` : ''}
|
||||
${showDelete ? `<button class="btn-delete" data-del="${process.id}">删除</button>` : ''}
|
||||
</div>
|
||||
<div id="logs-${process.id}" class="logs-container hidden"></div>
|
||||
`;
|
||||
parent.appendChild(item);
|
||||
bindItemButtons(item, process);
|
||||
}
|
||||
|
||||
function bindItemButtons(item, process) {
|
||||
const showBtn = item.querySelector('[data-show]');
|
||||
if (showBtn) showBtn.addEventListener('click', () => showLogs(process.id));
|
||||
const refreshBtn = item.querySelector('[data-refresh]');
|
||||
if (refreshBtn) refreshBtn.addEventListener('click', () => refreshLog(process.id));
|
||||
const swanBtn = item.querySelector('[data-swan]');
|
||||
if (swanBtn) swanBtn.addEventListener('click', () => checkAndOpenSwanlab(process.id));
|
||||
const stopBtn = item.querySelector('[data-stop]');
|
||||
if (stopBtn) stopBtn.addEventListener('click', () => stopProcess(process.id));
|
||||
const delBtn = item.querySelector('[data-del]');
|
||||
if (delBtn) delBtn.addEventListener('click', () => deleteProcess(process.id));
|
||||
}
|
||||
|
||||
export function updateProcessProgress(item, process) {
|
||||
// 只更新进度信息,不更新整个项目
|
||||
const progressInfo = calculateProgress(process);
|
||||
|
||||
// 更新进度条
|
||||
const progressFill = item.querySelector('.progress-fill');
|
||||
const progressText = item.querySelector('.progress-info span:first-child');
|
||||
const remainingText = item.querySelector('.progress-info span:last-child');
|
||||
const metricsContainer = item.querySelector('.progress-metrics');
|
||||
|
||||
if (progressFill) {
|
||||
progressFill.style.width = `${progressInfo.percentage}%`;
|
||||
}
|
||||
|
||||
if (progressText) {
|
||||
const stepText = progressInfo.step ? ` (${progressInfo.step})` : '';
|
||||
progressText.textContent = `进度: ${progressInfo.current}/${progressInfo.total}${stepText}`;
|
||||
}
|
||||
|
||||
if (remainingText) {
|
||||
remainingText.textContent = `剩余时间: ${progressInfo.remaining}`;
|
||||
}
|
||||
|
||||
if (metricsContainer) {
|
||||
// 更新指标 - 只更新有变化的值来减少DOM操作
|
||||
const lossItem = metricsContainer.querySelector('.metric-item:nth-child(1) .metric-value');
|
||||
const epochItem = metricsContainer.querySelector('.metric-item:nth-child(2) .metric-value');
|
||||
const stepItem = metricsContainer.querySelector('.metric-item:nth-child(3) .metric-value');
|
||||
const lrItem = metricsContainer.querySelector('.metric-item:nth-child(4) .metric-value');
|
||||
|
||||
if (progressInfo.loss && lossItem) {
|
||||
lossItem.textContent = progressInfo.loss;
|
||||
}
|
||||
if (progressInfo.epoch && epochItem) {
|
||||
epochItem.textContent = progressInfo.epoch;
|
||||
}
|
||||
if (progressInfo.step && stepItem) {
|
||||
stepItem.textContent = progressInfo.step;
|
||||
}
|
||||
if (progressInfo.lr && lrItem) {
|
||||
lrItem.textContent = progressInfo.lr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function updateProcessItem(item, process) {
|
||||
item.dataset.processStatus = process.status;
|
||||
item.dataset.trainMonitor = process.train_monitor || 'none';
|
||||
if (process.swanlab_url) item.dataset.swanlabUrl = process.swanlab_url;
|
||||
const statusEl = item.querySelector('.process-status');
|
||||
if (statusEl) {
|
||||
statusEl.classList.remove('status-running', 'status-manual-stop', 'status-error', 'status-completed');
|
||||
let cls = 'status-completed';
|
||||
if (process.status === '运行中') cls = 'status-running';
|
||||
else if (process.status === '手动停止') cls = 'status-manual-stop';
|
||||
else if (process.status === '出错') cls = 'status-error';
|
||||
statusEl.classList.add(cls);
|
||||
statusEl.textContent = process.status;
|
||||
}
|
||||
const btnContainer = item.querySelector('div:nth-child(2)');
|
||||
const existingSwan = item.querySelector('.btn-swanlab');
|
||||
const showSwan = process.train_monitor !== 'none';
|
||||
if (showSwan && !existingSwan && btnContainer) {
|
||||
const b = el('button', { class: 'btn-swanlab' });
|
||||
b.textContent = 'SwanLab';
|
||||
b.onclick = () => checkAndOpenSwanlab(process.id);
|
||||
const stop = btnContainer.querySelector('.btn-stop');
|
||||
if (stop) btnContainer.insertBefore(b, stop);
|
||||
else btnContainer.appendChild(b);
|
||||
} else if (!showSwan && existingSwan) existingSwan.remove();
|
||||
const stopBtn = item.querySelector('.btn-stop');
|
||||
if (stopBtn) {
|
||||
if (!process.running) stopBtn.remove();
|
||||
} else if (process.running && btnContainer) {
|
||||
const n = el('button', { class: 'btn-stop' });
|
||||
n.textContent = '停止训练';
|
||||
n.onclick = () => stopProcess(process.id);
|
||||
btnContainer.appendChild(n);
|
||||
}
|
||||
const delBtn = item.querySelector('.btn-delete');
|
||||
if (!process.running) {
|
||||
if (!delBtn) {
|
||||
const c = item.querySelector('div:last-child');
|
||||
if (c) {
|
||||
const d = el('button', { class: 'btn-delete' });
|
||||
d.textContent = '删除';
|
||||
d.onclick = () => deleteProcess(process.id);
|
||||
c.appendChild(d);
|
||||
}
|
||||
}
|
||||
} else if (delBtn) delBtn.remove();
|
||||
if (!process.running) clearLogTimerFor(process.id);
|
||||
}
|
||||
|
||||
export function deleteProcess(processId) {
|
||||
showConfirmDialog('确定要删除这个训练进程吗?此操作不可恢复。', () => {
|
||||
apiDelete(processId)
|
||||
.then(() => {
|
||||
const item = document.querySelector(`[data-process-id="${processId}"]`);
|
||||
if (item && item.parentNode) {
|
||||
item.style.transition = 'opacity 0.3s, transform 0.3s';
|
||||
item.style.opacity = '0';
|
||||
item.style.transform = 'translateX(-20px)';
|
||||
setTimeout(() => {
|
||||
const content = item.closest('.process-type-content');
|
||||
const group = content ? content.closest('.process-type-group') : null;
|
||||
item.parentNode.removeChild(item);
|
||||
if (content) {
|
||||
const remain = content.querySelectorAll('.process-item');
|
||||
if (remain.length === 0 && group) {
|
||||
setTimeout(() => {
|
||||
group.style.transition = 'opacity 0.3s, transform 0.3s';
|
||||
group.style.opacity = '0';
|
||||
group.style.transform = 'translateY(-10px)';
|
||||
setTimeout(() => {
|
||||
if (group.parentNode) group.parentNode.removeChild(group);
|
||||
const left = document.querySelectorAll('.process-item');
|
||||
if (left.length === 0) {
|
||||
const list = document.getElementById('process-list');
|
||||
list.innerHTML = '<p>暂无训练进程</p>';
|
||||
}
|
||||
}, 300);
|
||||
}, 100);
|
||||
} else {
|
||||
const header = content.previousElementSibling;
|
||||
if (header && header.dataset.expanded === 'true') content.style.maxHeight = content.scrollHeight + 'px';
|
||||
const left = document.querySelectorAll('.process-item');
|
||||
if (left.length === 0) {
|
||||
const list = document.getElementById('process-list');
|
||||
list.innerHTML = '<p>暂无训练进程</p>';
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 300);
|
||||
}
|
||||
clearLogTimerFor(processId);
|
||||
showNotification('训练进程已删除', 'success');
|
||||
})
|
||||
.catch(() => {
|
||||
showNotification('删除进程失败,请刷新页面重试', 'error');
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function stopProcess(processId) {
|
||||
showConfirmDialog('确定要停止这个训练进程吗?', () => {
|
||||
apiStop(processId)
|
||||
.then(() => {
|
||||
const item = document.querySelector(`[data-process-id="${processId}"]`);
|
||||
if (item) {
|
||||
item.dataset.processStatus = '手动停止';
|
||||
const statusEl = item.querySelector('.process-status');
|
||||
if (statusEl) {
|
||||
statusEl.classList.remove('status-running', 'status-error', 'status-completed');
|
||||
statusEl.classList.add('status-manual-stop');
|
||||
statusEl.textContent = '手动停止';
|
||||
}
|
||||
const stopBtn = item.querySelector('.btn-stop');
|
||||
if (stopBtn) stopBtn.remove();
|
||||
clearLogTimerFor(processId);
|
||||
}
|
||||
showNotification('训练进程已停止', 'info');
|
||||
getProcesses()
|
||||
.then((data) => {
|
||||
const updated = data.find((p) => p.id === processId);
|
||||
if (updated && item) updateProcessItem(item, updated);
|
||||
})
|
||||
.catch(() => {});
|
||||
})
|
||||
.catch(() => {
|
||||
showNotification('停止进程失败', 'error');
|
||||
});
|
||||
}, () => {
|
||||
showNotification('已取消停止操作', 'info');
|
||||
});
|
||||
}
|
||||
|
||||
export function checkAndOpenSwanlab(processId) {
|
||||
const item = document.querySelector(`[data-process-id="${processId}"]`);
|
||||
const monitor = item ? item.dataset.trainMonitor : 'none';
|
||||
if (monitor === 'none') {
|
||||
showNotification('此训练未启用监控功能', 'info');
|
||||
return;
|
||||
}
|
||||
let url = item ? item.dataset.swanlabUrl : '';
|
||||
if (!url || url.trim() === '') {
|
||||
getProcesses()
|
||||
.then((data) => {
|
||||
const p = data.find((x) => x.id === processId);
|
||||
if (p && p.swanlab_url) {
|
||||
url = p.swanlab_url;
|
||||
if (item) item.dataset.swanlabUrl = url;
|
||||
openSwanlab(url);
|
||||
} else {
|
||||
showNotification('SwanLab链接尚未生成,请稍后再试', 'info');
|
||||
}
|
||||
})
|
||||
.catch(() => {
|
||||
showNotification('获取SwanLab链接失败,请稍后再试', 'error');
|
||||
});
|
||||
} else openSwanlab(url);
|
||||
}
|
||||
|
||||
function openSwanlab(url) {
|
||||
if (!isValidUrl(url)) {
|
||||
showNotification('SwanLab链接无效或尚未生成', 'info');
|
||||
return;
|
||||
}
|
||||
const w = window.open(url, '_blank');
|
||||
if (w) showNotification('正在打开SwanLab页面', 'info');
|
||||
else showNotification('无法打开新窗口,请检查浏览器设置', 'error');
|
||||
}
|
||||
|
||||
function isValidUrl(url) {
|
||||
try {
|
||||
new URL(url);
|
||||
return true;
|
||||
} catch {
|
||||
const u = String(url).toLowerCase();
|
||||
return u.startsWith('http://') || u.startsWith('https://');
|
||||
}
|
||||
}
|
||||
|
||||
73
trainer_web/static/js/processes/logs.js
Normal file
73
trainer_web/static/js/processes/logs.js
Normal file
@ -0,0 +1,73 @@
|
||||
import { getLogs } from '../services/apiClient.js';
|
||||
import { setHidden } from '../utils/dom.js';
|
||||
|
||||
const logTimers = new Map();
|
||||
|
||||
export function showLogs(processId) {
|
||||
const container = document.getElementById(`logs-${processId}`);
|
||||
if (!container) return;
|
||||
const wasHidden = container.classList.contains('hidden');
|
||||
setHidden(container, false);
|
||||
if (wasHidden) {
|
||||
loadLogContent(processId, container);
|
||||
resetTimer(processId, container);
|
||||
} else {
|
||||
setHidden(container, true);
|
||||
clearTimer(processId);
|
||||
}
|
||||
}
|
||||
|
||||
export function refreshLog(processId) {
|
||||
const container = document.getElementById(`logs-${processId}`);
|
||||
if (!container || container.classList.contains('hidden')) return;
|
||||
loadLogContent(processId, container);
|
||||
resetTimer(processId, container);
|
||||
}
|
||||
|
||||
export function clearLogTimerFor(processId) {
|
||||
clearTimer(processId);
|
||||
}
|
||||
|
||||
export function isLogTimerActive(processId) {
|
||||
return logTimers.has(processId);
|
||||
}
|
||||
|
||||
function resetTimer(processId, container) {
|
||||
clearTimer(processId);
|
||||
const item = document.querySelector(`[data-process-id="${processId}"]`);
|
||||
const running = item && item.dataset.processStatus === '运行中';
|
||||
if (!running) return;
|
||||
const id = setInterval(() => {
|
||||
if (container.classList.contains('hidden')) {
|
||||
clearTimer(processId);
|
||||
return;
|
||||
}
|
||||
const current = document.querySelector(`[data-process-id="${processId}"]`);
|
||||
const stillRunning = current && current.dataset.processStatus === '运行中';
|
||||
if (stillRunning) loadLogContent(processId, container);
|
||||
else clearTimer(processId);
|
||||
}, 1000);
|
||||
logTimers.set(processId, id);
|
||||
}
|
||||
|
||||
function clearTimer(processId) {
|
||||
const id = logTimers.get(processId);
|
||||
if (id) {
|
||||
clearInterval(id);
|
||||
logTimers.delete(processId);
|
||||
}
|
||||
}
|
||||
|
||||
function loadLogContent(processId, container) {
|
||||
const old = container.textContent;
|
||||
const stickBottom = container.scrollHeight - container.scrollTop <= container.clientHeight + 10;
|
||||
return getLogs(processId)
|
||||
.then((logs) => {
|
||||
container.textContent = logs;
|
||||
if (stickBottom || old === container.textContent) container.scrollTop = container.scrollHeight;
|
||||
})
|
||||
.catch((err) => {
|
||||
if (!container.textContent.includes('加载失败')) container.textContent = `加载日志失败: ${err.message}`;
|
||||
});
|
||||
}
|
||||
|
||||
75
trainer_web/static/js/services/apiClient.js
Normal file
75
trainer_web/static/js/services/apiClient.js
Normal file
@ -0,0 +1,75 @@
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
export function fetchWithTimeoutAndRetry(url, options = {}, timeout = defaultTimeout, retries = 3) {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
const fetchOptions = {
|
||||
...options,
|
||||
headers: {
|
||||
...options.headers,
|
||||
'Cache-Control': 'no-cache, no-store, must-revalidate',
|
||||
Pragma: 'no-cache',
|
||||
Expires: '0',
|
||||
},
|
||||
signal: controller.signal,
|
||||
};
|
||||
|
||||
return fetch(url, fetchOptions)
|
||||
.then((response) => {
|
||||
clearTimeout(timeoutId);
|
||||
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||
return response;
|
||||
})
|
||||
.catch((error) => {
|
||||
clearTimeout(timeoutId);
|
||||
if (error.name === 'AbortError') throw new Error('请求超时');
|
||||
if (retries > 0) {
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
resolve(fetchWithTimeoutAndRetry(url, options, timeout, retries - 1));
|
||||
}, timeout / 2);
|
||||
});
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
export function getProcesses() {
|
||||
return fetchWithTimeoutAndRetry('/processes').then((r) => r.json());
|
||||
}
|
||||
|
||||
export function getLogs(processId) {
|
||||
return fetchWithTimeoutAndRetry(`/logs/${processId}`).then((r) => r.text());
|
||||
}
|
||||
|
||||
export function startTrain(payload) {
|
||||
return fetchWithTimeoutAndRetry('/train', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'no-cache' },
|
||||
body: JSON.stringify(payload),
|
||||
}).then((r) => r.json());
|
||||
}
|
||||
|
||||
export function stopProcess(processId) {
|
||||
return fetchWithTimeoutAndRetry(`/stop/${processId}`, { method: 'POST' }).then((r) => r.json().catch(() => ({})));
|
||||
}
|
||||
|
||||
export function deleteProcess(processId) {
|
||||
return fetchWithTimeoutAndRetry(`/delete/${processId}`, { method: 'POST' }).then((r) => r.json().catch(() => ({})));
|
||||
}
|
||||
|
||||
export function getLogFiles() {
|
||||
return fetchWithTimeoutAndRetry('/logfiles').then((r) => r.json());
|
||||
}
|
||||
|
||||
export function getLogFileContent(filename) {
|
||||
return fetchWithTimeoutAndRetry(`/logfile-content/${encodeURIComponent(filename)}`).then((r) => r.text());
|
||||
}
|
||||
|
||||
export function deleteLogFile(filename) {
|
||||
return fetchWithTimeoutAndRetry(`/delete-logfile/${encodeURIComponent(filename)}`, {
|
||||
method: 'DELETE',
|
||||
headers: { 'Cache-Control': 'no-cache' },
|
||||
}).then((r) => r.json());
|
||||
}
|
||||
|
||||
29
trainer_web/static/js/services/authClient.js
Normal file
29
trainer_web/static/js/services/authClient.js
Normal file
@ -0,0 +1,29 @@
|
||||
const KEY = 'minimind_api_key';
|
||||
|
||||
export function getApiKey() {
|
||||
try {
|
||||
return localStorage.getItem(KEY) || '';
|
||||
} catch (_) {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
export function setApiKey(k) {
|
||||
try {
|
||||
localStorage.setItem(KEY, k || '');
|
||||
} catch (_) {}
|
||||
}
|
||||
|
||||
export function registerClient(payload) {
|
||||
return fetch('/api/register', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'no-cache' },
|
||||
body: JSON.stringify(payload || {}),
|
||||
}).then((r) => {
|
||||
if (!r.ok) throw new Error('register_failed');
|
||||
return r.json();
|
||||
}).then((res) => {
|
||||
if (res && res.api_key) setApiKey(res.api_key);
|
||||
return res;
|
||||
});
|
||||
}
|
||||
128
trainer_web/static/js/train/form.js
Normal file
128
trainer_web/static/js/train/form.js
Normal file
@ -0,0 +1,128 @@
|
||||
import { startTrain } from '../services/apiClient.js';
|
||||
import { showNotification } from '../ui/notify.js';
|
||||
|
||||
export function initTrainForm() {
|
||||
const typeSel = document.getElementById('train_type');
|
||||
if (typeSel) {
|
||||
typeSel.addEventListener('change', onTrainTypeChange);
|
||||
typeSel.dispatchEvent(new Event('change'));
|
||||
}
|
||||
initGpuSelectors();
|
||||
const form = document.getElementById('train-form');
|
||||
if (form) form.addEventListener('submit', onSubmit);
|
||||
}
|
||||
|
||||
function onTrainTypeChange() {
|
||||
const v = this.value;
|
||||
const pretrainSft = document.querySelectorAll('.pretrain-sft');
|
||||
const fromWeightFields = document.querySelectorAll('.from-weight');
|
||||
const loraFields = document.querySelectorAll('.lora');
|
||||
const dpoFields = document.querySelectorAll('.dpo');
|
||||
const dpoCard = document.querySelector('.parameter-card.dpo');
|
||||
const ppoFields = document.querySelectorAll('.ppo');
|
||||
const ppoCard = document.querySelector('.parameter-card.ppo');
|
||||
const grpoFields = document.querySelectorAll('.grpo');
|
||||
const grpoCard = document.querySelector('.parameter-card.grpo');
|
||||
const spoFields = document.querySelectorAll('.spo');
|
||||
const spoCard = document.querySelector('.parameter-card.spo');
|
||||
pretrainSft.forEach((f) => (f.style.display = v === 'pretrain' || v === 'sft' || v === 'dpo' || v === 'ppo' || v === 'grpo' || v === 'spo' ? 'block' : 'none'));
|
||||
fromWeightFields.forEach((f) => (f.style.display = v !== 'ppo' && v !== 'grpo' && v !== 'spo' ? 'block' : 'none'));
|
||||
loraFields.forEach((f) => (f.style.display = v === 'lora' ? 'block' : 'none'));
|
||||
dpoFields.forEach((f) => (f.style.display = v === 'dpo' ? 'block' : 'none'));
|
||||
ppoFields.forEach((f) => (f.style.display = v === 'ppo' ? 'block' : 'none'));
|
||||
if (dpoCard) dpoCard.style.display = v === 'dpo' ? 'block' : 'none';
|
||||
if (ppoCard) ppoCard.style.display = v === 'ppo' ? 'block' : 'none';
|
||||
grpoFields.forEach((f) => (f.style.display = v === 'grpo' ? 'block' : 'none'));
|
||||
spoFields.forEach((f) => (f.style.display = v === 'spo' ? 'block' : 'none'));
|
||||
if (grpoCard) grpoCard.style.display = v === 'grpo' ? 'block' : 'none';
|
||||
if (spoCard) spoCard.style.display = v === 'spo' ? 'block' : 'none';
|
||||
if (v === 'pretrain') setDefaults({ save_dir: '../out', save_weight: 'pretrain', epochs: '1', batch_size: '32', learning_rate: '5e-4', data_path: '../dataset/pretrain_hq.jsonl', from_weight: 'none', log_interval: '100', save_interval: '100', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '512', use_moe: '0' });
|
||||
else if (v === 'sft') setDefaults({ save_dir: '../out', save_weight: 'full_sft', epochs: '2', batch_size: '16', learning_rate: '5e-7', data_path: '../dataset/sft_mini_512.jsonl', from_weight: 'pretrain', log_interval: '100', save_interval: '100', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '512', use_moe: '0' });
|
||||
else if (v === 'lora') setDefaults({ save_dir: '../out/lora', lora_name: 'lora_identity', epochs: '50', batch_size: '32', learning_rate: '1e-4', data_path: '../dataset/lora_identity.jsonl', from_weight: 'full_sft', log_interval: '10', save_interval: '1', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '512', use_moe: '0' });
|
||||
else if (v === 'dpo') setDefaults({ save_dir: '../out', save_weight: 'dpo', epochs: '1', batch_size: '4', learning_rate: '4e-8', data_path: '../dataset/dpo.jsonl', from_weight: 'full_sft', log_interval: '100', save_interval: '100', beta: '0.1', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '1024', use_moe: '0' });
|
||||
else if (v === 'ppo') setDefaults({ save_dir: '../out', save_weight: 'ppo_actor', epochs: '1', batch_size: '2', learning_rate: '8e-8', data_path: '../dataset/rlaif-mini.jsonl', log_interval: '1', save_interval: '10', clip_epsilon: '0.1', vf_coef: '0.5', kl_coef: '0.02', reasoning: '1', update_old_actor_freq: '4', reward_model_path: '../../internlm2-1_8b-reward', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '66', use_moe: '0' });
|
||||
else if (v === 'grpo') setDefaults({ save_dir: '../out', save_weight: 'grpo', epochs: '1', batch_size: '2', learning_rate: '8e-8', data_path: '../dataset/rlaif-mini.jsonl', log_interval: '1', save_interval: '10', beta: '0.02', num_generations: '8', reasoning: '1', reward_model_path: '../../internlm2-1_8b-reward', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '66', use_moe: '0' });
|
||||
else if (v === 'spo') setDefaults({ save_dir: '../out', save_weight: 'spo', epochs: '1', batch_size: '2', learning_rate: '1e-7', data_path: '../dataset/rlaif-mini.jsonl', log_interval: '1', save_interval: '10', beta: '0.02', reasoning: '1', reward_model_path: '../../internlm2-1_8b-reward', hidden_size: '512', num_hidden_layers: '8', max_seq_len: '66', use_moe: '0' });
|
||||
}
|
||||
|
||||
function setDefaults(map) {
|
||||
Object.entries(map).forEach(([name, val]) => {
|
||||
const nodes = document.querySelectorAll(`[name="${name}"]`);
|
||||
nodes.forEach((node) => {
|
||||
const card = node.closest('.parameter-card');
|
||||
const visible = !card || card.style.display !== 'none';
|
||||
if (visible) node.value = val;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function initGpuSelectors() {
|
||||
const hasGpu = window.hasGpu === true;
|
||||
const gpuCount = Number(window.gpuCount || 0);
|
||||
const modeSel = document.getElementById('training_mode');
|
||||
const single = document.getElementById('single-gpu-selection');
|
||||
const multi = document.getElementById('multi-gpu-selection');
|
||||
if (!modeSel) return;
|
||||
function updateVisibility() {
|
||||
const mode = modeSel.value;
|
||||
if (single) single.style.display = mode === 'single_gpu' ? 'block' : 'none';
|
||||
if (multi) multi.style.display = mode === 'multi_gpu' ? 'block' : 'none';
|
||||
}
|
||||
if (!hasGpu) {
|
||||
modeSel.value = 'cpu';
|
||||
if (single) single.style.display = 'none';
|
||||
if (multi) multi.style.display = 'none';
|
||||
} else {
|
||||
const gpuNumInput = document.getElementById('gpu_num');
|
||||
if (gpuNumInput && gpuCount > 0) gpuNumInput.value = gpuCount;
|
||||
}
|
||||
updateVisibility();
|
||||
modeSel.addEventListener('change', updateVisibility);
|
||||
}
|
||||
|
||||
function onSubmit(e) {
|
||||
e.preventDefault();
|
||||
const form = e.currentTarget;
|
||||
const data = {};
|
||||
const trainingModeSel = form.querySelector('#training_mode');
|
||||
const trainingMode = trainingModeSel ? trainingModeSel.value : 'cpu';
|
||||
const inputs = form.querySelectorAll('input, select, textarea');
|
||||
inputs.forEach((el) => {
|
||||
const name = el.name;
|
||||
if (!name || name === 'training_mode') return;
|
||||
const card = el.closest('.parameter-card');
|
||||
const visible = !card || card.style.display !== 'none';
|
||||
if (!visible) return;
|
||||
let value = el.value;
|
||||
if (el.type === 'checkbox') {
|
||||
if (!el.checked) return;
|
||||
}
|
||||
if (name === 'gpu_num') {
|
||||
const multi = document.getElementById('multi-gpu-selection');
|
||||
if (!(multi && multi.style.display !== 'none')) return;
|
||||
}
|
||||
if (name === 'device') {
|
||||
if (trainingMode === 'single_gpu') value = `cuda:${value}`;
|
||||
else if (trainingMode === 'cpu') value = 'cpu';
|
||||
else return;
|
||||
}
|
||||
data[name] = value;
|
||||
});
|
||||
showNotification('正在启动训练...', 'info');
|
||||
setTimeout(() => {
|
||||
startTrain(data)
|
||||
.then((result) => {
|
||||
if (result.success) {
|
||||
showNotification('训练已开始!', 'success');
|
||||
setTimeout(() => {
|
||||
const processTab = document.querySelector('.tab[onclick*="processes"]');
|
||||
if (processTab) processTab.click();
|
||||
}, 1000);
|
||||
} else showNotification('训练启动失败:' + result.error, 'error');
|
||||
})
|
||||
.catch(() => {
|
||||
showNotification('启动训练中,请耐心等待...', 'info');
|
||||
});
|
||||
}, 1000);
|
||||
}
|
||||
|
||||
51
trainer_web/static/js/ui/dialog.js
Normal file
51
trainer_web/static/js/ui/dialog.js
Normal file
@ -0,0 +1,51 @@
|
||||
export function showConfirmDialog(message, onConfirm, onCancel = null) {
|
||||
const existing = document.querySelector('.custom-dialog');
|
||||
if (existing && existing.parentNode && existing.parentNode.classList.contains('dialog-overlay')) {
|
||||
document.body.removeChild(existing.parentNode);
|
||||
}
|
||||
const overlay = document.createElement('div');
|
||||
overlay.className = 'dialog-overlay';
|
||||
const container = document.createElement('div');
|
||||
container.className = 'custom-dialog';
|
||||
container.innerHTML = `
|
||||
<div class="dialog-content">
|
||||
<div class="dialog-message">${message}</div>
|
||||
<div class="dialog-actions">
|
||||
<button class="dialog-button dialog-cancel">取消</button>
|
||||
<button class="dialog-button dialog-confirm">确认</button>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
overlay.appendChild(container);
|
||||
document.body.appendChild(overlay);
|
||||
setTimeout(() => {
|
||||
overlay.classList.add('show');
|
||||
container.classList.add('show');
|
||||
}, 10);
|
||||
const confirmBtn = container.querySelector('.dialog-confirm');
|
||||
confirmBtn.addEventListener('click', () => {
|
||||
if (onConfirm) onConfirm();
|
||||
closeDialog(overlay);
|
||||
});
|
||||
const cancelBtn = container.querySelector('.dialog-cancel');
|
||||
cancelBtn.addEventListener('click', () => {
|
||||
if (onCancel) onCancel();
|
||||
closeDialog(overlay);
|
||||
});
|
||||
overlay.addEventListener('click', (e) => {
|
||||
if (e.target === overlay) {
|
||||
if (onCancel) onCancel();
|
||||
closeDialog(overlay);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function closeDialog(overlay) {
|
||||
overlay.classList.remove('show');
|
||||
const container = overlay.querySelector('.custom-dialog');
|
||||
if (container) container.classList.remove('show');
|
||||
setTimeout(() => {
|
||||
if (overlay.parentNode) document.body.removeChild(overlay);
|
||||
}, 300);
|
||||
}
|
||||
|
||||
16
trainer_web/static/js/ui/notify.js
Normal file
16
trainer_web/static/js/ui/notify.js
Normal file
@ -0,0 +1,16 @@
|
||||
export function showNotification(message, type = 'success') {
|
||||
const n = document.createElement('div');
|
||||
n.className = `notification notification-${type}`;
|
||||
n.textContent = message;
|
||||
document.body.appendChild(n);
|
||||
setTimeout(() => {
|
||||
n.classList.add('show');
|
||||
}, 10);
|
||||
setTimeout(() => {
|
||||
n.classList.remove('show');
|
||||
setTimeout(() => {
|
||||
if (n.parentNode) document.body.removeChild(n);
|
||||
}, 300);
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
15
trainer_web/static/js/ui/tabs.js
Normal file
15
trainer_web/static/js/ui/tabs.js
Normal file
@ -0,0 +1,15 @@
|
||||
import { qsa } from '../utils/dom.js';
|
||||
|
||||
export function openTab(evt, tabName, hooks = {}) {
|
||||
const contents = qsa('.tab-content');
|
||||
contents.forEach((c) => c.classList.add('hidden'));
|
||||
const tabs = qsa('.tab');
|
||||
tabs.forEach((t) => t.classList.remove('active'));
|
||||
const target = document.getElementById(tabName);
|
||||
if (target) target.classList.remove('hidden');
|
||||
if (evt && evt.currentTarget) evt.currentTarget.classList.add('active');
|
||||
if (tabName !== 'processes' && hooks.onLeaveProcesses) hooks.onLeaveProcesses();
|
||||
if (tabName === 'processes' && hooks.onEnterProcesses) hooks.onEnterProcesses();
|
||||
if (tabName === 'logfiles' && hooks.onEnterLogfiles) hooks.onEnterLogfiles();
|
||||
}
|
||||
|
||||
34
trainer_web/static/js/utils/dom.js
Normal file
34
trainer_web/static/js/utils/dom.js
Normal file
@ -0,0 +1,34 @@
|
||||
export function qs(selector, scope = document) {
|
||||
return scope.querySelector(selector);
|
||||
}
|
||||
|
||||
export function qsa(selector, scope = document) {
|
||||
return Array.from(scope.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
export function el(tag, attrs = {}) {
|
||||
const node = document.createElement(tag);
|
||||
for (const [k, v] of Object.entries(attrs)) {
|
||||
if (k === 'class') node.className = v;
|
||||
else if (k === 'text') node.textContent = v;
|
||||
else node.setAttribute(k, v);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
export function setHidden(node, hidden) {
|
||||
if (!node) return;
|
||||
if (hidden) node.classList.add('hidden');
|
||||
else node.classList.remove('hidden');
|
||||
}
|
||||
|
||||
export function setText(node, text) {
|
||||
if (!node) return;
|
||||
node.textContent = text;
|
||||
}
|
||||
|
||||
export function clearChildren(node) {
|
||||
if (!node) return;
|
||||
while (node.firstChild) node.removeChild(node.firstChild);
|
||||
}
|
||||
|
||||
27
trainer_web/stop_web_ui.sh
Executable file
27
trainer_web/stop_web_ui.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
if [ -f "train_web_ui.pid" ]; then
|
||||
pid=$(cat "train_web_ui.pid")
|
||||
if ps -p "$pid" > /dev/null 2>&1; then
|
||||
echo "正在停止 Web UI 服务 (PID: $pid)"
|
||||
kill "$pid"
|
||||
sleep 2
|
||||
# 检查是否成功停止
|
||||
if ps -p "$pid" > /dev/null 2>&1; then
|
||||
echo "强制停止服务..."
|
||||
kill -9 "$pid"
|
||||
fi
|
||||
echo "正在保存进程信息..."
|
||||
echo "已保存到 'trainer_web/training_processes.json'"
|
||||
sleep 1
|
||||
echo "服务已停止"
|
||||
else
|
||||
echo "服务未运行,但存在PID文件,已删除"
|
||||
rm "train_web_ui.pid"
|
||||
fi
|
||||
else
|
||||
echo "服务未运行(未找到PID文件)"
|
||||
fi
|
||||
357
trainer_web/templates/index.html
Normal file
357
trainer_web/templates/index.html
Normal file
@ -0,0 +1,357 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>MiniMind Training Lab</title>
|
||||
<link rel="stylesheet" href="/static/css/style.css">
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<img src="/static/images/logo2.png" alt="MiniMind Logo" class="logo">
|
||||
<h1>MiniMind Training Lab</h1>
|
||||
</div>
|
||||
|
||||
<div class="tabs">
|
||||
<button class="tab active" onclick="openTab(event, 'train')">开始训练</button>
|
||||
<button class="tab" onclick="openTab(event, 'processes')">训练进程</button>
|
||||
<button class="tab" onclick="openTab(event, 'logfiles')">日志文件</button>
|
||||
</div>
|
||||
|
||||
<div id="train" class="tab-content">
|
||||
<div class="form-container">
|
||||
<h2 class="section-title">选择训练类型并配置参数</h2>
|
||||
<form id="train-form" method="post" action="/train">
|
||||
<!-- 基础训练参数 -->
|
||||
<div class="parameter-card">
|
||||
<h3 class="card-title">基础训练参数</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="train_type">训练类型:</label>
|
||||
<select id="train_type" name="train_type" required>
|
||||
<option value="pretrain">🔤 Pretrain</option>
|
||||
<option value="sft">🎯 SFT - Full</option>
|
||||
<option value="lora">⚡ SFT - Lora</option>
|
||||
<option value="dpo">🧠 RL - DPO</option>
|
||||
<option value="ppo">🚀 RL - PPO</option>
|
||||
<option value="grpo">💡 RL - GRPO</option>
|
||||
<option value="spo">🔍 RL - SPO</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="epochs">训练轮数:</label>
|
||||
<input type="number" id="epochs" name="epochs" min="1" value="10" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="batch_size">Batch Size:</label>
|
||||
<input type="number" id="batch_size" name="batch_size" min="1" value="32" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="learning_rate">学习率:</label>
|
||||
<input type="text" id="learning_rate" name="learning_rate" value="5e-4" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="log_interval">日志打印间隔:</label>
|
||||
<input type="number" id="log_interval" name="log_interval" min="1" value="100" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="data_path">数据路径:</label>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="data_path" name="data_path" value="./dataset" required>
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('data_path')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 强化学习参数 -->
|
||||
<div class="parameter-card dpo" style="display: none;">
|
||||
<h3 class="card-title">强化学习参数 (DPO)</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="beta">DPO Beta 参数:</label>
|
||||
<input type="text" id="beta" name="beta" placeholder="0.1">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- PPO强化学习参数 -->
|
||||
<div class="parameter-card ppo" style="display: none;">
|
||||
<h3 class="card-title">强化学习参数 (PPO)</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="clip_epsilon">PPO剪切系数:</label>
|
||||
<input type="text" id="clip_epsilon" name="clip_epsilon" placeholder="0.2">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="vf_coef">价值函数系数:</label>
|
||||
<input type="text" id="vf_coef" name="vf_coef" placeholder="0.1">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="kl_coef">KL散度惩罚系数:</label>
|
||||
<input type="text" id="kl_coef" name="kl_coef" placeholder="0.01">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reasoning">是否使用Reasoning模式:</label>
|
||||
<select id="reasoning" name="reasoning">
|
||||
<option value="0">否</option>
|
||||
<option value="1">是</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="update_old_actor_freq">更新旧Actor频率:</label>
|
||||
<input type="number" id="update_old_actor_freq" name="update_old_actor_freq" placeholder="10" min="1">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reward_model_path">奖励模型路径:</label>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="reward_model_path" name="reward_model_path" placeholder="path/to/reward/model">
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('reward_model_path')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- GRPO强化学习参数 -->
|
||||
<div class="parameter-card grpo" style="display: none;">
|
||||
<h3 class="card-title">强化学习参数 (GRPO)</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="beta_grpo">GRPO KL惩罚系数:</label>
|
||||
<input type="text" id="beta_grpo" name="beta" placeholder="0.02">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="num_generations">每个prompt生成样本数:</label>
|
||||
<input type="number" id="num_generations" name="num_generations" placeholder="8" min="1">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reasoning_grpo">是否使用Reasoning模式:</label>
|
||||
<select id="reasoning_grpo" name="reasoning">
|
||||
<option value="0">否</option>
|
||||
<option value="1">是</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reward_model_path_grpo">奖励模型路径:</label>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="reward_model_path_grpo" name="reward_model_path" placeholder="../../internlm2-1_8b-reward">
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('reward_model_path_grpo')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- SPO强化学习参数 -->
|
||||
<div class="parameter-card spo" style="display: none;">
|
||||
<h3 class="card-title">强化学习参数 (SPO)</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="beta_spo">SPO KL惩罚系数:</label>
|
||||
<input type="text" id="beta_spo" name="beta" placeholder="0.02">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reasoning_spo">是否使用Reasoning模式:</label>
|
||||
<select id="reasoning_spo" name="reasoning">
|
||||
<option value="0">否</option>
|
||||
<option value="1">是</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reward_model_path_spo">奖励模型路径:</label>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="reward_model_path_spo" name="reward_model_path" placeholder="../../internlm2-1_8b-reward">
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('reward_model_path_spo')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 模型结构参数 -->
|
||||
<div class="parameter-card">
|
||||
<h3 class="card-title">模型结构参数</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="hidden_size">隐藏层维度:</label>
|
||||
<input type="number" id="hidden_size" name="hidden_size" min="128" value="512" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="num_hidden_layers">隐藏层数量:</label>
|
||||
<input type="number" id="num_hidden_layers" name="num_hidden_layers" min="1" value="8" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="max_seq_len">最大序列长度:</label>
|
||||
<input type="number" id="max_seq_len" name="max_seq_len" min="64" value="512" required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="use_moe">是否使用MoE架构:</label>
|
||||
<select id="use_moe" name="use_moe">
|
||||
<option value="0">❌ 否</option>
|
||||
<option value="1">✅ 是</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 模型保存与恢复 -->
|
||||
<div class="parameter-card">
|
||||
<h3 class="card-title">模型保存与恢复</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="save_dir">模型保存目录:</label>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="save_dir" name="save_dir" value="./checkpoints" required>
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('save_dir')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="save_interval">模型保存间隔:</label>
|
||||
<input type="number" id="save_interval" name="save_interval" min="1" value="1000" required>
|
||||
</div>
|
||||
<div class="form-group pretrain-sft">
|
||||
<label for="save_weight">保存权重前缀名:</label>
|
||||
<input type="text" id="save_weight" name="save_weight" value="model">
|
||||
</div>
|
||||
<div class="form-group lora">
|
||||
<label for="lora_name">LoRA权重名称:</label>
|
||||
<input type="text" id="lora_name" name="lora_name" value="lora_adapter">
|
||||
</div>
|
||||
<div class="form-group from-weight">
|
||||
<label for="from_weight">基于哪个权重训练:</label>
|
||||
<input type="text" id="from_weight" name="from_weight" value="pretrained_model">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="checkbox-group">
|
||||
<input type="checkbox" id="from_resume" name="from_resume" value="1">
|
||||
<label for="from_resume">是否自动检测&续训</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 其他设置 -->
|
||||
<div class="parameter-card">
|
||||
<h3 class="card-title">其他设置</h3>
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="training_mode">训练方式:</label>
|
||||
<select id="training_mode" name="training_mode" required>
|
||||
<option value="single_gpu">🎮 单卡训练</option>
|
||||
<option value="multi_gpu">🚀 多卡训练</option>
|
||||
<option value="cpu">💻 CPU训练</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group" id="single-gpu-selection">
|
||||
<label for="device">GPU序号:</label>
|
||||
<input type="number" id="device" name="device" min="0" max="{{ gpu_count|default(1) - 1 }}" value="0" required>
|
||||
</div>
|
||||
<div class="form-group" id="multi-gpu-selection" style="display: none;">
|
||||
<label for="gpu_num">多卡并行数:</label>
|
||||
<div class="input-with-picker">
|
||||
<input type="number" id="gpu_num" name="gpu_num" min="1" max="{{ gpu_count|default(1) }}" value="{{ gpu_count|default(1) }}" required>
|
||||
<span class="hint-text">(可用GPU数量: {{ gpu_count|default(0) }})</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="train_monitor">训练监控:</label>
|
||||
<select id="train_monitor" name="train_monitor">
|
||||
<option value="none">❌ 无监控</option>
|
||||
<option value="wandb">📊 使用WandB/SwanLab</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
window.hasGpu = {{ has_gpu|default(false)|tojson|safe }};
|
||||
window.gpuCount = {{ gpu_count|default(0)|tojson|safe }};
|
||||
</script>
|
||||
|
||||
<div class="submit-container">
|
||||
<button type="submit" class="btn-primary">
|
||||
<span class="btn-icon">🚀</span>
|
||||
开始训练
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="processes" class="tab-content hidden">
|
||||
<div class="section-header">
|
||||
<h2 class="section-title">训练进程列表</h2>
|
||||
<div class="section-actions">
|
||||
<button class="btn-refresh" onclick="refreshProcesses()">
|
||||
<span class="btn-icon">🔄</span>
|
||||
刷新
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="process-list">
|
||||
<!-- 进程列表将通过JavaScript动态加载 -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="logfiles" class="tab-content hidden">
|
||||
<div class="section-header">
|
||||
<h2 class="section-title">日志文件列表</h2>
|
||||
<div class="section-actions">
|
||||
<button class="btn-refresh" onclick="refreshLogs()">
|
||||
<span class="btn-icon">🔄</span>
|
||||
刷新
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="logfiles-list">
|
||||
<!-- 日志文件列表将通过JavaScript动态加载 -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 文件浏览器模态框 -->
|
||||
<div id="file-browser-modal" class="modal hidden">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h3 id="modal-title">选择文件或文件夹</h3>
|
||||
<button class="modal-close" onclick="closeFileBrowser()">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="file-browser-nav">
|
||||
<div class="current-path" id="current-path">./</div>
|
||||
<div class="nav-buttons">
|
||||
<button class="btn-navigate" onclick="selectCurrentDirectory()" title="选择当前目录">📍</button>
|
||||
<button class="btn-navigate" onclick="navigateToParent()" title="上级目录">⬆️</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="quick-paths" id="quick-paths">
|
||||
<!-- 快捷路径将在这里显示 -->
|
||||
</div>
|
||||
<div class="file-browser-help">
|
||||
💡 点击文件夹进入目录,点击文件选择文件,使用📍选择当前目录
|
||||
</div>
|
||||
<div class="file-list" id="file-list">
|
||||
<!-- 文件列表将在这里显示 -->
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<input type="text" id="selected-path" placeholder="选择的文件或文件夹路径" readonly>
|
||||
<button class="btn-primary" onclick="confirmFileSelection()">确认选择</button>
|
||||
<button class="btn-secondary" onclick="closeFileBrowser()">取消</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/static/js/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
1091
trainer_web/train_web_ui.py
Normal file
1091
trainer_web/train_web_ui.py
Normal file
File diff suppressed because it is too large
Load Diff
1
trainer_web/training_processes.json
Normal file
1
trainer_web/training_processes.json
Normal file
@ -0,0 +1 @@
|
||||
{}
|
||||
Loading…
Reference in New Issue
Block a user