mirror of
https://github.com/jingyaogong/minimind.git
synced 2026-04-25 08:48:16 +08:00
Update train_gated_ppo.py
This commit is contained in:
parent
0b37f04f15
commit
db2d948f93
@ -166,7 +166,7 @@ def ppo_train_epoch(epoch, loader, iters, old_actor_model, ref_model, actor_sche
|
||||
|
||||
# 修改部分:添加门控
|
||||
ratio = torch.exp(actor_logp - old_logp) # [B]
|
||||
ratio = ratio * torch.sigmoid(0.5 * ratio)
|
||||
ratio = ratio * torch.sigmoid(0.1 * ratio)
|
||||
|
||||
surr1 = ratio * advantages # [B]
|
||||
surr2 = torch.clamp(ratio, 1.0 - args.clip_epsilon, 1.0 + args.clip_epsilon) * advantages # [B]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user