[None][test] restrict max_num_tokens in disagg mtp config (#10442)

Signed-off-by: Ruodi Lu <ruodil@users.noreply.github.com>
Co-authored-by: Ruodi Lu <ruodil@users.noreply.github.com>
This commit is contained in:
ruodil 2026-01-09 10:53:24 +08:00 committed by GitHub
parent afa55c12b6
commit d707286ca8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 30 additions and 17 deletions

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: false
pipeline_parallel_size: 4
max_batch_size: 1
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 1 * (3 + 1) = 4
max_num_tokens: 4
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 1
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 1 * (3 + 1) = 4
max_num_tokens: 4
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 4
max_num_tokens: 128
# mtp_size=2 ⇒ max_num_tokens = 4 * (2 + 1) = 12
max_num_tokens: 12
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: false
pipeline_parallel_size: 1
max_batch_size: 1
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 1 * (3 + 1) = 4
max_num_tokens: 4
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: false
pipeline_parallel_size: 1
max_batch_size: 2
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 2 * (3 + 1) = 8
max_num_tokens: 8
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,7 @@ worker_config:
enable_attention_dp: false
pipeline_parallel_size: 1
max_batch_size: 2
max_num_tokens: 128
max_num_tokens: 8
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,7 @@ worker_config:
enable_attention_dp: false
pipeline_parallel_size: 1
max_batch_size: 2
max_num_tokens: 128
max_num_tokens: 6
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,7 @@ worker_config:
enable_attention_dp: false
pipeline_parallel_size: 1
max_batch_size: 2
max_num_tokens: 128
max_num_tokens: 8
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,7 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 16
max_num_tokens: 128
max_num_tokens: 32
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 8
max_num_tokens: 128
# mtp_size=2 ⇒ max_num_tokens = 8 * (2 + 1) = 24
max_num_tokens: 24
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 2
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 2 * (3 + 1) = 8
max_num_tokens: 8
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 8
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 8 * (3 + 1) = 32
max_num_tokens: 32
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 2
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 2 * (3 + 1) = 8
max_num_tokens: 8
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 16
max_num_tokens: 128
# mtp_size=1 ⇒ max_num_tokens = 16 * (1 + 1) = 32
max_num_tokens: 32
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 32
max_num_tokens: 128
# mtp_size=1 ⇒ max_num_tokens = 32 * (1 + 1) = 64
max_num_tokens: 64
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 4
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 4 * (3 + 1) = 16
max_num_tokens: 16
max_seq_len: 139296
cuda_graph_config:
enable_padding: true

View File

@ -49,7 +49,8 @@ worker_config:
enable_attention_dp: true
pipeline_parallel_size: 1
max_batch_size: 8
max_num_tokens: 128
# mtp_size=3 ⇒ max_num_tokens = 8 * (3 + 1) = 32
max_num_tokens: 32
max_seq_len: 139296
cuda_graph_config:
enable_padding: true