jingyaogong
|
42a4e8c86a
|
[fix] dist cleanup
|
2026-01-02 22:25:55 +08:00 |
|
jingyaogong
|
9d898576ac
|
[update] aux loss
|
2026-01-01 22:41:46 +08:00 |
|
jingyaogong
|
c65335b56f
|
[fix] experts unused
|
2025-12-31 21:47:04 +08:00 |
|
jingyaogong
|
5dd4df7e18
|
[fix] moe unused
|
2025-12-31 21:00:06 +08:00 |
|
jingyaogong
|
7eae14f3ce
|
[feat] remove empty_cache
|
2025-12-27 07:14:36 +08:00 |
|
jingyaogong
|
fe24501602
|
[feat] adjust seq length
|
2025-12-14 20:41:58 +08:00 |
|
jingyaogong
|
5e1447b913
|
[fix] cuda memory #559
|
2025-12-01 16:17:43 +08:00 |
|
jingyaogong
|
6b86ea399a
|
[feat] release memory
|
2025-11-27 19:39:49 +08:00 |
|
jingyaogong
|
509d8dacf1
|
[feat] clear cache
|
2025-11-06 13:12:28 +08:00 |
|
jingyaogong
|
bf123b585d
|
[feat] add args
|
2025-10-30 10:05:12 +08:00 |
|
jingyaogong
|
1713c24114
|
[fix] model device
|
2025-10-29 10:36:28 +08:00 |
|
jingyaogong
|
acd5925193
|
[feat] update trainer
|
2025-10-29 00:52:37 +08:00 |
|
jingyaogong
|
8f7e07b8ef
|
[feat] update trainer
|
2025-10-28 23:30:10 +08:00 |
|
jingyaogong
|
e8484874f5
|
[feat] pause-training
|
2025-10-26 18:49:52 +08:00 |
|
jingyaogong
|
a82526da11
|
[feat] shuffle data
|
2025-10-23 20:13:28 +08:00 |
|
jingyaogong
|
805744e60a
|
[fix] loss-issues-430
|
2025-10-23 19:08:42 +08:00 |
|
jingyaogong
|
463044e92a
|
[fix] sampler-ddp
|
2025-10-23 15:03:19 +08:00 |
|
jingyaogong
|
fa7dff8291
|
[fix] graph-oom & ddp-pos_cis
|
2025-10-23 14:22:13 +08:00 |
|
jingyaogong
|
7d877db79b
|
[feat] minimind-2510
|
2025-10-21 21:19:47 +08:00 |
|
jingyaogong
|
caae54a89e
|
fix bugs
|
2025-04-29 20:45:39 +08:00 |
|
jingyaogong
|
a62faf34bd
|
250426
|
2025-04-26 10:05:47 +08:00 |
|