[feat] shuffle data

This commit is contained in:
jingyaogong
2025-10-23 20:13:28 +08:00
parent 805744e60a
commit a82526da11
9 changed files with 9 additions and 9 deletions
+1 -1
View File
@@ -199,7 +199,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)
+1 -1
View File
@@ -248,7 +248,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)
+1 -1
View File
@@ -231,7 +231,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)
+1 -1
View File
@@ -185,7 +185,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)
+1 -1
View File
@@ -299,7 +299,7 @@ if __name__ == "__main__":
train_ds = RLAIFDataset(args.data_path, tokenizer, max_length=lm_config.max_seq_len)
train_sampler = DistributedSampler(train_ds) if ddp else None
train_loader = DataLoader(train_ds, batch_size=args.batch_size, pin_memory=True,
drop_last=False, shuffle=False,
drop_last=False, shuffle=(train_sampler is None),
num_workers=args.num_workers, sampler=train_sampler)
optimizer = optim.AdamW(model.parameters(), lr=args.learning_rate)
+1 -1
View File
@@ -196,7 +196,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)
+1 -1
View File
@@ -344,7 +344,7 @@ if __name__ == "__main__":
train_ds = RLAIFDataset(args.data_path, tokenizer, max_length=(args.max_seq_len + args.max_gen_len))
train_sampler = DistributedSampler(train_ds) if ddp else None
train_loader = DataLoader(train_ds, batch_size=args.batch_size, pin_memory=True,
drop_last=False, shuffle=False,
drop_last=False, shuffle=(train_sampler is None),
num_workers=args.num_workers, sampler=train_sampler)
# 初始化优化器
+1 -1
View File
@@ -183,7 +183,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)
+1 -1
View File
@@ -348,7 +348,7 @@ if __name__ == "__main__":
train_ds = RLAIFDataset(args.data_path, tokenizer, max_length=lm_config.max_seq_len)
train_sampler = DistributedSampler(train_ds) if ddp else None
train_loader = DataLoader(train_ds, batch_size=args.batch_size, pin_memory=True,
drop_last=False, shuffle=False,
drop_last=False, shuffle=(train_sampler is None),
num_workers=args.num_workers, sampler=train_sampler)
optimizer = optim.AdamW(model.parameters(), lr=args.learning_rate)