[feat] shuffle data

This commit is contained in:
jingyaogong 2025-10-23 20:13:28 +08:00
parent 805744e60a
commit a82526da11
9 changed files with 9 additions and 9 deletions

View File

@ -199,7 +199,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)

View File

@ -248,7 +248,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)

View File

@ -231,7 +231,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)

View File

@ -185,7 +185,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)

View File

@ -299,7 +299,7 @@ if __name__ == "__main__":
train_ds = RLAIFDataset(args.data_path, tokenizer, max_length=lm_config.max_seq_len)
train_sampler = DistributedSampler(train_ds) if ddp else None
train_loader = DataLoader(train_ds, batch_size=args.batch_size, pin_memory=True,
drop_last=False, shuffle=False,
drop_last=False, shuffle=(train_sampler is None),
num_workers=args.num_workers, sampler=train_sampler)
optimizer = optim.AdamW(model.parameters(), lr=args.learning_rate)

View File

@ -196,7 +196,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)

View File

@ -344,7 +344,7 @@ if __name__ == "__main__":
train_ds = RLAIFDataset(args.data_path, tokenizer, max_length=(args.max_seq_len + args.max_gen_len))
train_sampler = DistributedSampler(train_ds) if ddp else None
train_loader = DataLoader(train_ds, batch_size=args.batch_size, pin_memory=True,
drop_last=False, shuffle=False,
drop_last=False, shuffle=(train_sampler is None),
num_workers=args.num_workers, sampler=train_sampler)
# 初始化优化器

View File

@ -183,7 +183,7 @@ if __name__ == "__main__":
batch_size=args.batch_size,
pin_memory=True,
drop_last=False,
shuffle=False,
shuffle=(train_sampler is None),
num_workers=args.num_workers,
sampler=train_sampler
)

View File

@ -348,7 +348,7 @@ if __name__ == "__main__":
train_ds = RLAIFDataset(args.data_path, tokenizer, max_length=lm_config.max_seq_len)
train_sampler = DistributedSampler(train_ds) if ddp else None
train_loader = DataLoader(train_ds, batch_size=args.batch_size, pin_memory=True,
drop_last=False, shuffle=False,
drop_last=False, shuffle=(train_sampler is None),
num_workers=args.num_workers, sampler=train_sampler)
optimizer = optim.AdamW(model.parameters(), lr=args.learning_rate)