diff --git a/examples/disaggregated/slurm/benchmark/config.yaml b/examples/disaggregated/slurm/benchmark/config.yaml index 50761e7e8e..e4bda5707d 100644 --- a/examples/disaggregated/slurm/benchmark/config.yaml +++ b/examples/disaggregated/slurm/benchmark/config.yaml @@ -49,9 +49,12 @@ profiling: # Accuracy Configuration accuracy: enable_accuracy_test: false # Set to true to enable accuracy evaluation - model: "local-completions" # Model type for lm_eval - tasks: "gsm8k" # Evaluation tasks (comma-separated) - model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096" # Extra model arguments for lm_eval + tasks: + gsm8k: + model: "local-completions" # Model type for lm_eval + model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=7200,max_gen_toks=16384" + extra_kwargs: + trust_remote_code: true worker_config: gen: diff --git a/examples/wide_ep/slurm_scripts/config.yaml b/examples/wide_ep/slurm_scripts/config.yaml index 2f10c9707d..5c5b4441b4 100644 --- a/examples/wide_ep/slurm_scripts/config.yaml +++ b/examples/wide_ep/slurm_scripts/config.yaml @@ -47,9 +47,12 @@ profiling: # Accuracy Configuration accuracy: enable_accuracy_test: false # Set to true to enable accuracy evaluation - model: "local-completions" # Model type for lm_eval - tasks: "gsm8k" # Evaluation tasks (comma-separated) - model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096" # Extra model arguments for lm_eval + tasks: + gsm8k: + model: "local-completions" # Model type for lm_eval + model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=7200,max_gen_toks=16384" + extra_kwargs: + trust_remote_code: true worker_config: gen: