From db2d948f9330e97caf44e5c8a60b76ff94682b69 Mon Sep 17 00:00:00 2001 From: vanking <128895735+vanking20000918@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:34:20 +0800 Subject: [PATCH] Update train_gated_ppo.py --- train_gated_ppo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_gated_ppo.py b/train_gated_ppo.py index 6aa966b..58135c2 100644 --- a/train_gated_ppo.py +++ b/train_gated_ppo.py @@ -166,7 +166,7 @@ def ppo_train_epoch(epoch, loader, iters, old_actor_model, ref_model, actor_sche # 修改部分:添加门控 ratio = torch.exp(actor_logp - old_logp) # [B] - ratio = ratio * torch.sigmoid(0.5 * ratio) + ratio = ratio * torch.sigmoid(0.1 * ratio) surr1 = ratio * advantages # [B] surr2 = torch.clamp(ratio, 1.0 - args.clip_epsilon, 1.0 + args.clip_epsilon) * advantages # [B]