From 288e1ac02a7662aaf862dcf7f4b5114f4a2f57a6 Mon Sep 17 00:00:00 2001 From: jingyaogong Date: Fri, 6 Feb 2026 01:36:02 +0800 Subject: [PATCH] [update] empty_think_ratio --- dataset/lm_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataset/lm_dataset.py b/dataset/lm_dataset.py index cf663d4..667ad86 100644 --- a/dataset/lm_dataset.py +++ b/dataset/lm_dataset.py @@ -23,7 +23,7 @@ def pre_processing_chat(conversations, add_system_ratio=0.2): return [{'role': 'system', 'content': random.choice(SYSTEM_PROMPTS)}] + conversations return conversations -def post_processing_chat(prompt_content, empty_think_ratio=0.1): +def post_processing_chat(prompt_content, empty_think_ratio=0.02): if '\n\n\n\n' in prompt_content and random.random() > empty_think_ratio: prompt_content = prompt_content.replace('\n\n\n\n', '') return prompt_content