mirror of
https://github.com/jingyaogong/minimind.git
synced 2026-01-13 19:57:20 +08:00
update web dataset file
This commit is contained in:
parent
fc1f07bf74
commit
4a741b3d3b
@ -253,6 +253,119 @@ input[type="text"]:focus, input[type="number"]:focus, select:focus, textarea:foc
|
||||
background: rgba(45, 55, 72, 0.9);
|
||||
transform: translateY(-1px);
|
||||
}
|
||||
|
||||
/* 文件夹选择器样式 */
|
||||
.input-with-picker {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.input-with-picker input {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.btn-picker {
|
||||
background: linear-gradient(135deg, var(--info-grad-start) 0%, var(--info-grad-end) 100%);
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 0.75rem;
|
||||
border-radius: var(--radius-md);
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s ease;
|
||||
box-shadow: var(--shadow-sm);
|
||||
min-width: 40px;
|
||||
height: 40px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.btn-picker:hover {
|
||||
transform: translateY(-1px);
|
||||
box-shadow: var(--shadow-md);
|
||||
filter: brightness(1.1);
|
||||
}
|
||||
|
||||
/* 进度条样式 */
|
||||
.progress-container {
|
||||
margin: 0.5rem 0;
|
||||
background: rgba(45, 55, 72, 0.3);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 0.5rem;
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
width: 100%;
|
||||
height: 8px;
|
||||
background: rgba(45, 55, 72, 0.5);
|
||||
border-radius: var(--radius-sm);
|
||||
overflow: hidden;
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background: linear-gradient(90deg, var(--accent-grad-start) 0%, var(--accent-grad-end) 100%);
|
||||
border-radius: var(--radius-sm);
|
||||
transition: width 0.3s ease;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.progress-fill::after {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
|
||||
animation: progress-shine 2s infinite;
|
||||
}
|
||||
|
||||
@keyframes progress-shine {
|
||||
0% { transform: translateX(-100%); }
|
||||
100% { transform: translateX(100%); }
|
||||
}
|
||||
|
||||
.progress-info {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
font-size: 0.8rem;
|
||||
color: var(--text-secondary);
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.progress-metrics {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
flex-wrap: wrap;
|
||||
font-size: 0.85rem;
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.metric-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
background: rgba(139, 92, 246, 0.1);
|
||||
border-radius: var(--radius-sm);
|
||||
border: 1px solid rgba(139, 92, 246, 0.2);
|
||||
}
|
||||
|
||||
.metric-label {
|
||||
font-weight: 500;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.metric-value {
|
||||
font-weight: 600;
|
||||
color: var(--accent);
|
||||
}
|
||||
.checkbox-group {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
@ -17,6 +17,38 @@ const hooks = {
|
||||
|
||||
window.openTab = (evt, tabName) => _openTab(evt, tabName, hooks);
|
||||
|
||||
// 文件夹选择器功能
|
||||
window.selectFolder = async (inputId) => {
|
||||
try {
|
||||
// 检查是否支持 File System Access API
|
||||
if ('showDirectoryPicker' in window) {
|
||||
const dirHandle = await window.showDirectoryPicker();
|
||||
const path = dirHandle.name; // 使用目录名称作为路径
|
||||
document.getElementById(inputId).value = `./${path}`;
|
||||
} else {
|
||||
// 降级方案:使用文件输入模拟
|
||||
const input = document.createElement('input');
|
||||
input.type = 'file';
|
||||
input.webkitdirectory = true;
|
||||
input.onchange = (e) => {
|
||||
const files = e.target.files;
|
||||
if (files.length > 0) {
|
||||
// 提取相对路径
|
||||
const path = files[0].webkitRelativePath.split('/')[0];
|
||||
document.getElementById(inputId).value = `./${path}`;
|
||||
}
|
||||
};
|
||||
input.click();
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('文件夹选择失败:', error);
|
||||
// 如果用户取消选择,不显示错误
|
||||
if (error.name !== 'AbortError') {
|
||||
alert('文件夹选择失败,请手动输入路径');
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('load', () => {
|
||||
initTrainForm();
|
||||
startProcessPolling();
|
||||
|
||||
@ -4,6 +4,64 @@ import { showConfirmDialog } from '../ui/dialog.js';
|
||||
import { el, clearChildren } from '../utils/dom.js';
|
||||
import { showLogs, refreshLog, clearLogTimerFor } from './logs.js';
|
||||
|
||||
// 计算训练进度信息
|
||||
function calculateProgress(process) {
|
||||
const defaultProgress = {
|
||||
percentage: 0,
|
||||
current: 0,
|
||||
total: 0,
|
||||
remaining: '计算中...',
|
||||
loss: null,
|
||||
epoch: null,
|
||||
lr: null
|
||||
};
|
||||
|
||||
// 如果进程不在运行,返回默认进度
|
||||
if (!process.running) return defaultProgress;
|
||||
|
||||
// 从进程数据中提取进度信息
|
||||
if (process.progress) {
|
||||
return {
|
||||
percentage: process.progress.percentage || 0,
|
||||
current: process.progress.current_epoch || 0,
|
||||
total: process.progress.total_epochs || 0,
|
||||
remaining: process.progress.remaining_time || '计算中...',
|
||||
loss: process.progress.current_loss || null,
|
||||
epoch: process.progress.current_epoch ? `${process.progress.current_epoch}/${process.progress.total_epochs}` : null,
|
||||
lr: process.progress.current_lr || null
|
||||
};
|
||||
}
|
||||
|
||||
// 尝试从日志中提取进度信息(简化版本)
|
||||
if (process.logs) {
|
||||
const logText = process.logs.slice(-1000); // 取最近1000字符
|
||||
|
||||
// 提取epoch信息
|
||||
const epochMatch = logText.match(/epoch\s+(\d+)\/(\d+)/i);
|
||||
if (epochMatch) {
|
||||
const current = parseInt(epochMatch[1]);
|
||||
const total = parseInt(epochMatch[2]);
|
||||
const percentage = total > 0 ? Math.round((current / total) * 100) : 0;
|
||||
|
||||
// 提取loss信息
|
||||
const lossMatch = logText.match(/loss[\s:=]\s*([\d.]+)/i);
|
||||
const currentLoss = lossMatch ? parseFloat(lossMatch[1]).toFixed(4) : null;
|
||||
|
||||
return {
|
||||
percentage,
|
||||
current,
|
||||
total,
|
||||
remaining: '计算中...',
|
||||
loss: currentLoss,
|
||||
epoch: `${current}/${total}`,
|
||||
lr: null
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return defaultProgress;
|
||||
}
|
||||
|
||||
let processPollingTimer = null;
|
||||
|
||||
export function startProcessPolling() {
|
||||
@ -135,11 +193,32 @@ export function addProcessItemToGroup(parent, process) {
|
||||
const showDelete = !process.running;
|
||||
const showSwanlab = process.train_monitor !== 'none';
|
||||
const swanBtn = showSwanlab ? `<button class="btn-swanlab" data-swan="${process.id}">SwanLab</button>` : '';
|
||||
|
||||
// 计算进度信息
|
||||
const progressInfo = calculateProgress(process);
|
||||
const progressBar = process.running ? `
|
||||
<div class="progress-container">
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" style="width: ${progressInfo.percentage}%"></div>
|
||||
</div>
|
||||
<div class="progress-info">
|
||||
<span>进度: ${progressInfo.current}/${progressInfo.total}</span>
|
||||
<span>剩余时间: ${progressInfo.remaining}</span>
|
||||
</div>
|
||||
<div class="progress-metrics">
|
||||
${progressInfo.loss ? `<div class="metric-item"><span class="metric-label">Loss:</span><span class="metric-value">${progressInfo.loss}</span></div>` : ''}
|
||||
${progressInfo.epoch ? `<div class="metric-item"><span class="metric-label">Epoch:</span><span class="metric-value">${progressInfo.epoch}</span></div>` : ''}
|
||||
${progressInfo.lr ? `<div class="metric-item"><span class="metric-label">LR:</span><span class="metric-value">${progressInfo.lr}</span></div>` : ''}
|
||||
</div>
|
||||
</div>
|
||||
` : '';
|
||||
|
||||
item.innerHTML = `
|
||||
<div class="process-info">
|
||||
<div><strong>${process.start_time}</strong></div>
|
||||
<div><span class="process-status ${statusClass}">${process.status}</span></div>
|
||||
</div>
|
||||
${progressBar}
|
||||
<div>
|
||||
<button class="btn-logs" data-show="${process.id}">查看日志</button>
|
||||
<button class="btn-logs" data-refresh="${process.id}">刷新日志</button>
|
||||
|
||||
@ -59,7 +59,12 @@
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="data_path">数据路径:</label>
|
||||
<input type="text" id="data_path" name="data_path" value="./dataset" required>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="data_path" name="data_path" value="./dataset" required>
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('data_path')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -104,7 +109,12 @@
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reward_model_path">奖励模型路径:</label>
|
||||
<input type="text" id="reward_model_path" name="reward_model_path" placeholder="path/to/reward/model">
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="reward_model_path" name="reward_model_path" placeholder="path/to/reward/model">
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('reward_model_path')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -130,7 +140,12 @@
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reward_model_path_grpo">奖励模型路径:</label>
|
||||
<input type="text" id="reward_model_path_grpo" name="reward_model_path" placeholder="../../internlm2-1_8b-reward">
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="reward_model_path_grpo" name="reward_model_path" placeholder="../../internlm2-1_8b-reward">
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('reward_model_path_grpo')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -152,7 +167,12 @@
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="reward_model_path_spo">奖励模型路径:</label>
|
||||
<input type="text" id="reward_model_path_spo" name="reward_model_path" placeholder="../../internlm2-1_8b-reward">
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="reward_model_path_spo" name="reward_model_path" placeholder="../../internlm2-1_8b-reward">
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('reward_model_path_spo')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -189,7 +209,12 @@
|
||||
<div class="parameter-content">
|
||||
<div class="form-group">
|
||||
<label for="save_dir">模型保存目录:</label>
|
||||
<input type="text" id="save_dir" name="save_dir" value="./checkpoints" required>
|
||||
<div class="input-with-picker">
|
||||
<input type="text" id="save_dir" name="save_dir" value="./checkpoints" required>
|
||||
<button type="button" class="btn-picker" onclick="selectFolder('save_dir')" title="选择文件夹">
|
||||
📁
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="save_interval">模型保存间隔:</label>
|
||||
@ -234,8 +259,10 @@
|
||||
</div>
|
||||
<div class="form-group" id="multi-gpu-selection" style="display: none;">
|
||||
<label for="gpu_num">多卡并行数:</label>
|
||||
<input type="number" id="gpu_num" name="gpu_num" min="1" max="{{ gpu_count|default(1) }}" value="{{ gpu_count|default(1) }}" required>
|
||||
<span class="hint-text">(可用GPU数量: {{ gpu_count|default(0) }})</span>
|
||||
<div class="input-with-picker">
|
||||
<input type="number" id="gpu_num" name="gpu_num" min="1" max="{{ gpu_count|default(1) }}" value="{{ gpu_count|default(1) }}" required>
|
||||
<span class="hint-text">(可用GPU数量: {{ gpu_count|default(0) }})</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="train_monitor">训练监控:</label>
|
||||
|
||||
@ -6,6 +6,7 @@ import json
|
||||
import socket
|
||||
import atexit
|
||||
import signal
|
||||
import re
|
||||
from flask import Flask, render_template, request, jsonify, redirect, url_for
|
||||
from flask import g
|
||||
import time
|
||||
@ -28,6 +29,120 @@ except ImportError:
|
||||
GPU_COUNT = 0
|
||||
GPU_NAMES = []
|
||||
|
||||
def calculate_training_progress(process_id, process_info):
|
||||
"""
|
||||
计算训练进度信息
|
||||
从日志文件中提取训练进度、loss、epoch等信息
|
||||
"""
|
||||
progress = {
|
||||
'percentage': 0,
|
||||
'current_epoch': 0,
|
||||
'total_epochs': 0,
|
||||
'remaining_time': '计算中...',
|
||||
'current_loss': None,
|
||||
'current_lr': None
|
||||
}
|
||||
|
||||
# 如果进程不在运行,返回空进度
|
||||
if not process_info.get('running', False):
|
||||
return progress
|
||||
|
||||
try:
|
||||
# 获取日志文件路径
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
log_dir = os.path.join(script_dir, '../logfile')
|
||||
log_dir = os.path.abspath(log_dir)
|
||||
|
||||
log_file = None
|
||||
if os.path.exists(log_dir):
|
||||
for filename in os.listdir(log_dir):
|
||||
if filename.endswith(f'{process_id}.log'):
|
||||
log_file = os.path.join(log_dir, filename)
|
||||
break
|
||||
|
||||
if not log_file or not os.path.exists(log_file):
|
||||
return progress
|
||||
|
||||
# 读取日志文件的最后1000行
|
||||
def read_last_lines(file_path, n=1000):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
# 使用更高效的方法读取最后n行
|
||||
lines = []
|
||||
for line in f:
|
||||
lines.append(line.strip())
|
||||
if len(lines) > n:
|
||||
lines.pop(0)
|
||||
return lines
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
lines = read_last_lines(log_file, 1000)
|
||||
|
||||
# 从日志中提取进度信息
|
||||
current_epoch = 0
|
||||
total_epochs = 0
|
||||
current_loss = None
|
||||
current_lr = None
|
||||
|
||||
for line in reversed(lines): # 从最新日志开始
|
||||
# 提取epoch信息 (格式: epoch 3/10)
|
||||
if not total_epochs:
|
||||
epoch_match = re.search(r'epoch\s+(\d+)\s*/\s*(\d+)', line, re.IGNORECASE)
|
||||
if epoch_match:
|
||||
current_epoch = int(epoch_match.group(1))
|
||||
total_epochs = int(epoch_match.group(2))
|
||||
|
||||
# 提取loss信息 (格式: loss: 4.32 或 loss = 4.32)
|
||||
if not current_loss:
|
||||
loss_match = re.search(r'loss[\s:=]\s*([\d.]+)', line, re.IGNORECASE)
|
||||
if loss_match:
|
||||
current_loss = float(loss_match.group(1))
|
||||
|
||||
# 提取学习率信息 (格式: lr: 1e-4 或 learning_rate: 1e-4)
|
||||
if not current_lr:
|
||||
lr_match = re.search(r'(?:lr|learning_rate)[\s:=]\s*([\d.e-]+)', line, re.IGNORECASE)
|
||||
if lr_match:
|
||||
current_lr = lr_match.group(1)
|
||||
|
||||
# 如果已经收集到足够信息,提前退出
|
||||
if total_epochs and current_loss and current_lr:
|
||||
break
|
||||
|
||||
# 计算进度百分比
|
||||
percentage = 0
|
||||
if total_epochs > 0:
|
||||
percentage = min(100, max(0, int((current_epoch / total_epochs) * 100)))
|
||||
|
||||
# 估算剩余时间(简化计算)
|
||||
remaining_time = '计算中...'
|
||||
if current_epoch > 0 and total_epochs > current_epoch:
|
||||
# 假设每epoch时间大致相同
|
||||
elapsed_time = time.time() - process_info.get('start_timestamp', time.time())
|
||||
time_per_epoch = elapsed_time / current_epoch
|
||||
remaining_epochs = total_epochs - current_epoch
|
||||
remaining_seconds = remaining_epochs * time_per_epoch
|
||||
|
||||
if remaining_seconds > 3600:
|
||||
remaining_time = f"{remaining_seconds / 3600:.1f}小时"
|
||||
elif remaining_seconds > 60:
|
||||
remaining_time = f"{remaining_seconds / 60:.1f}分钟"
|
||||
else:
|
||||
remaining_time = f"{int(remaining_seconds)}秒"
|
||||
|
||||
return {
|
||||
'percentage': percentage,
|
||||
'current_epoch': current_epoch,
|
||||
'total_epochs': total_epochs,
|
||||
'remaining_time': remaining_time,
|
||||
'current_loss': f"{current_loss:.4f}" if current_loss else None,
|
||||
'current_lr': current_lr
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"计算进度时出错: {e}")
|
||||
return progress
|
||||
|
||||
# 训练方式支持检测
|
||||
def get_supported_training_methods():
|
||||
"""获取当前环境支持的训练方法"""
|
||||
@ -107,6 +222,7 @@ def start_training_process(train_type, params, client_id=None):
|
||||
'train_type': train_type,
|
||||
'log_file': log_file,
|
||||
'start_time': time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'start_timestamp': time.time(), # 添加时间戳用于进度计算
|
||||
'running': True,
|
||||
'error': False,
|
||||
'train_monitor': params.get('train_monitor', 'none'), # 保存训练监控设置
|
||||
@ -187,6 +303,9 @@ def processes():
|
||||
status = '运行中' if info['running'] else \
|
||||
'手动停止' if 'manually_stopped' in info and info['manually_stopped'] else \
|
||||
'出错' if info['error'] else '已完成'
|
||||
|
||||
# 计算训练进度信息
|
||||
progress = calculate_training_progress(process_id, info)
|
||||
|
||||
result.append({
|
||||
'id': process_id,
|
||||
@ -196,7 +315,8 @@ def processes():
|
||||
'error': info['error'],
|
||||
'status': status,
|
||||
'train_monitor': info.get('train_monitor', 'none'), # 添加train_monitor字段
|
||||
'swanlab_url': info.get('swanlab_url') # 添加swanlab_url字段
|
||||
'swanlab_url': info.get('swanlab_url'), # 添加swanlab_url字段
|
||||
'progress': progress # 添加进度信息
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user