mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
Add flake8-implicit-str-concat rules to Ruff (#33191)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -393,7 +393,7 @@ if __name__ == "__main__":
|
||||
with open(results_folder / md_file, "w") as f:
|
||||
results = read_markdown(
|
||||
"../.buildkite/performance-benchmarks/"
|
||||
+ "performance-benchmarks-descriptions.md"
|
||||
"performance-benchmarks-descriptions.md"
|
||||
)
|
||||
results = results.format(
|
||||
latency_tests_markdown_table=latency_md_table,
|
||||
|
||||
@@ -288,8 +288,8 @@ def generate_sch_sig(schedule_config: ScheduleConfig) -> str:
|
||||
)
|
||||
cluster_shape = (
|
||||
f"{schedule_config.cluster_shape_mnk[0]}"
|
||||
+ f"x{schedule_config.cluster_shape_mnk[1]}"
|
||||
+ f"x{schedule_config.cluster_shape_mnk[2]}"
|
||||
f"x{schedule_config.cluster_shape_mnk[1]}"
|
||||
f"x{schedule_config.cluster_shape_mnk[2]}"
|
||||
)
|
||||
kernel_schedule = VLLMKernelScheduleTag[schedule_config.kernel_schedule].split(
|
||||
"::"
|
||||
@@ -301,7 +301,7 @@ def generate_sch_sig(schedule_config: ScheduleConfig) -> str:
|
||||
|
||||
return (
|
||||
f"{tile_shape}_{cluster_shape}_{kernel_schedule}"
|
||||
+ f"_{epilogue_schedule}_{tile_scheduler}"
|
||||
f"_{epilogue_schedule}_{tile_scheduler}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ from vllm import LLM, SamplingParams
|
||||
# A prompt containing a large markdown table. The table is randomly generated by GPT-4.
|
||||
LONG_PROMPT = (
|
||||
"You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n"
|
||||
+ """
|
||||
"""
|
||||
| ID | Name | Age | Occupation | Country | Email | Phone Number | Address |
|
||||
|-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------|
|
||||
| 1 | John Doe | 29 | Engineer | USA | john.doe@example.com | 555-1234 | 123 Elm St, Springfield, IL |
|
||||
|
||||
@@ -69,10 +69,10 @@ class StatsCalculator:
|
||||
np_arr = np.array(self._stats)
|
||||
output_str = (
|
||||
f"\nNum requests: {len(self._stats)}"
|
||||
+ "\nPrefill node TTFT stats:"
|
||||
+ f"\n - Average (ms): {np.mean(np_arr)}"
|
||||
+ f"\n - Median (ms): {np.median(np_arr)}"
|
||||
+ f"\n - 99th Percentile (ms): {np.percentile(np_arr, 99)}\n"
|
||||
"\nPrefill node TTFT stats:"
|
||||
f"\n - Average (ms): {np.mean(np_arr)}"
|
||||
f"\n - Median (ms): {np.median(np_arr)}"
|
||||
f"\n - 99th Percentile (ms): {np.percentile(np_arr, 99)}\n"
|
||||
)
|
||||
print(
|
||||
"===============================",
|
||||
|
||||
@@ -72,6 +72,8 @@ select = [
|
||||
"UP",
|
||||
# flake8-bugbear
|
||||
"B",
|
||||
# flake8-implicit-str-concat
|
||||
"ISC",
|
||||
# flake8-simplify
|
||||
"SIM",
|
||||
# isort
|
||||
|
||||
@@ -193,7 +193,7 @@ TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
"<|python_start|>[get_weather(city='LA', metric='C'), "
|
||||
+ "register_user(name='Doe', age=9)]",
|
||||
"register_user(name='Doe', age=9)]",
|
||||
[
|
||||
SIMPLE_FUNCTION_CALL,
|
||||
FunctionCall(name="register_user", arguments='{"name": "Doe", "age": 9}'),
|
||||
|
||||
@@ -24,9 +24,9 @@ def parser(deepseekv31_tokenizer):
|
||||
def test_extract_tool_calls_with_tool(parser):
|
||||
model_output = (
|
||||
"normal text"
|
||||
+ "<|tool▁calls▁begin|>"
|
||||
+ '<|tool▁call▁begin|>foo<|tool▁sep|>{"x":1}<|tool▁call▁end|>'
|
||||
+ "<|tool▁calls▁end|>"
|
||||
"<|tool▁calls▁begin|>"
|
||||
'<|tool▁call▁begin|>foo<|tool▁sep|>{"x":1}<|tool▁call▁end|>'
|
||||
"<|tool▁calls▁end|>"
|
||||
)
|
||||
result = parser.extract_tool_calls(model_output, None)
|
||||
assert result.tools_called
|
||||
@@ -39,11 +39,11 @@ def test_extract_tool_calls_with_tool(parser):
|
||||
def test_extract_tool_calls_with_multiple_tools(parser):
|
||||
model_output = (
|
||||
"some prefix text"
|
||||
+ "<|tool▁calls▁begin|>"
|
||||
+ '<|tool▁call▁begin|>foo<|tool▁sep|>{"x":1}<|tool▁call▁end|>'
|
||||
+ '<|tool▁call▁begin|>bar<|tool▁sep|>{"y":2}<|tool▁call▁end|>'
|
||||
+ "<|tool▁calls▁end|>"
|
||||
+ " some suffix text"
|
||||
"<|tool▁calls▁begin|>"
|
||||
'<|tool▁call▁begin|>foo<|tool▁sep|>{"x":1}<|tool▁call▁end|>'
|
||||
'<|tool▁call▁begin|>bar<|tool▁sep|>{"y":2}<|tool▁call▁end|>'
|
||||
"<|tool▁calls▁end|>"
|
||||
" some suffix text"
|
||||
)
|
||||
|
||||
result = parser.extract_tool_calls(model_output, None)
|
||||
|
||||
+1
-1
@@ -1302,7 +1302,7 @@ def prep_prompts(batch_size: int, ln_range: tuple[int, int] = (800, 1100)):
|
||||
indices.append(idx)
|
||||
prompt = (
|
||||
"```python\n# We set a number of variables, "
|
||||
+ f"x{idx} will be important later\n"
|
||||
f"x{idx} will be important later\n"
|
||||
)
|
||||
ln = random.randint(*ln_range)
|
||||
for k in range(30, ln):
|
||||
|
||||
@@ -134,8 +134,7 @@ class BenchmarkDataset(ABC):
|
||||
content.append(mm_content)
|
||||
else:
|
||||
raise TypeError(
|
||||
"Could not process multimodal content of type: "
|
||||
+ f"{type(mm_content)}"
|
||||
f"Could not process multimodal content of type: {type(mm_content)}"
|
||||
)
|
||||
return [{"role": "user", "content": content}]
|
||||
|
||||
|
||||
@@ -208,8 +208,8 @@ class TorchCompileWithNoGuardsWrapper:
|
||||
if not hasattr(self._compiled_callable, "aot_compile"):
|
||||
raise RuntimeError(
|
||||
"aot_compile is not supported by the current configuration. "
|
||||
+ "Please make sure torch.compile is enabled with the latest "
|
||||
+ f"version of PyTorch (current using torch: {torch.__version__})"
|
||||
"Please make sure torch.compile is enabled with the latest "
|
||||
f"version of PyTorch (current using torch: {torch.__version__})"
|
||||
)
|
||||
return self._compiled_callable.aot_compile((args, kwargs))
|
||||
|
||||
|
||||
@@ -406,8 +406,8 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
|
||||
if request.response_format not in ["text", "json", "verbose_json"]:
|
||||
return self.create_error_response(
|
||||
("Currently only support response_format")
|
||||
+ ("`text`, `json` or `verbose_json`")
|
||||
"Currently only support response_format: "
|
||||
"`text`, `json` or `verbose_json`"
|
||||
)
|
||||
|
||||
if (
|
||||
|
||||
@@ -32,8 +32,8 @@ class AiterInt8ScaledMMLinearKernel(CutlassInt8ScaledMMLinearKernel):
|
||||
return (
|
||||
False,
|
||||
"requires setting `VLLM_ROCM_USE_AITER=1` "
|
||||
+ "and `VLLM_ROCM_USE_AITER_LINEAR=1`. "
|
||||
+ "`VLLM_ROCM_USE_AITER_LINEAR` default is True.",
|
||||
"and `VLLM_ROCM_USE_AITER_LINEAR=1`. "
|
||||
"`VLLM_ROCM_USE_AITER_LINEAR` default is True.",
|
||||
)
|
||||
return True, None
|
||||
|
||||
@@ -97,9 +97,9 @@ class AiterInt8ScaledMMLinearKernel(CutlassInt8ScaledMMLinearKernel):
|
||||
per_token_scale_a and per_channel_scale_b
|
||||
), (
|
||||
"Currently only support per-tensor-per-tensor GEMM "
|
||||
+ " and per-token-per-channel GEMM through AITER"
|
||||
" and per-token-per-channel GEMM through AITER"
|
||||
" w8a8 scaled gemm. `AiterInt8ScaledMMLinearKernel` "
|
||||
+ "does not support AITER block scaled GEMM."
|
||||
"does not support AITER block scaled GEMM."
|
||||
)
|
||||
|
||||
# gemm_a8w8_CK(a, b, scale_a, scale_b, bias) expects
|
||||
|
||||
@@ -234,7 +234,7 @@ class Olmo3ReasoningParser(ReasoningParser):
|
||||
# reasoning template.
|
||||
reasoning_expr = (
|
||||
rf"^(?:{self.think_start})?(?P<reasoning>.*?)"
|
||||
+ rf"{self.think_end}(?P<content>.*)$"
|
||||
rf"{self.think_end}(?P<content>.*)$"
|
||||
)
|
||||
self.reasoning_regex = re.compile(reasoning_expr, re.DOTALL)
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ class FlashAttnMLAMetadataBuilder(MLACommonMetadataBuilder[FlashAttnMLAMetadata]
|
||||
# Ensure the persistent buffer is large enough
|
||||
assert n <= self.scheduler_metadata.shape[0], (
|
||||
f"Scheduler metadata size {n} exceeds buffer size "
|
||||
+ f"{self.scheduler_metadata.shape[0]}"
|
||||
f"{self.scheduler_metadata.shape[0]}"
|
||||
)
|
||||
self.scheduler_metadata[:n] = scheduler_metadata
|
||||
# NOTE(woosuk): We should zero out the rest of the scheduler
|
||||
|
||||
@@ -646,7 +646,7 @@ class ChunkedLocalAttentionManager(SingleTypeKVCacheManager):
|
||||
"""
|
||||
assert isinstance(kv_cache_spec, ChunkedLocalAttentionSpec), (
|
||||
"ChunkedLocalAttentionManager can only be used for "
|
||||
+ "chunked local attention groups"
|
||||
"chunked local attention groups"
|
||||
)
|
||||
assert use_eagle is False, (
|
||||
"Hybrid KV cache is not supported for " + "eagle + chunked local attention."
|
||||
|
||||
@@ -618,7 +618,7 @@ class AsyncLLM(EngineClient):
|
||||
except Exception as e2:
|
||||
s = (
|
||||
f"{e.__class__.__name__}: "
|
||||
+ "error during printing an exception of class"
|
||||
"error during printing an exception of class"
|
||||
+ e2.__class__.__name__
|
||||
)
|
||||
logger.info("Request %s failed due to %s.", request_id, s)
|
||||
|
||||
Reference in New Issue
Block a user