diff --git a/.buildkite/performance-benchmarks/tests/latency-tests-arm64-cpu.json b/.buildkite/performance-benchmarks/tests/latency-tests-arm64-cpu.json
index fba695041e3..98811049d39 100644
--- a/.buildkite/performance-benchmarks/tests/latency-tests-arm64-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/latency-tests-arm64-cpu.json
@@ -2,7 +2,6 @@
     {
         "test_name": "latency_llama8B_tp1",
         "environment_variables": {
-            "VLLM_RPC_TIMEOUT": 100000,
             "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
             "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
             "VLLM_CPU_KVCACHE_SPACE": 40
diff --git a/.buildkite/performance-benchmarks/tests/latency-tests-cpu.json b/.buildkite/performance-benchmarks/tests/latency-tests-cpu.json
index 77d1694ec86..5f048df5f6a 100644
--- a/.buildkite/performance-benchmarks/tests/latency-tests-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/latency-tests-cpu.json
@@ -2,7 +2,6 @@
     {
         "test_name": "latency_llama8B_tp2",
         "environment_variables": {
-            "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
 	    "VLLM_CPU_SGL_KERNEL": 1,
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json b/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json
index 9f226ef2f81..75b80b2c212 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json
@@ -13,7 +13,6 @@
       200
     ],
     "server_environment_variables": {
-      "VLLM_RPC_TIMEOUT": 100000,
       "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
       "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
       "VLLM_CPU_SGL_KERNEL": 1,
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json
index 30879b5e9dc..16e8b0600ac 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json
@@ -5,7 +5,6 @@
     ],
     "max_concurrency_list": [12, 16, 24, 32, 64, 128, 200],
     "server_environment_variables": {
-      "VLLM_RPC_TIMEOUT": 100000,
       "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120
     },
     "server_parameters": {
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-embed.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-embed.json
index 6d3455c478c..c62f244fc76 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-embed.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-embed.json
@@ -9,7 +9,6 @@
       128
     ],
     "server_environment_variables": {
-      "VLLM_RPC_TIMEOUT": 100000,
       "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
       "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
       "VLLM_CPU_SGL_KERNEL": 1,
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json
index 34c2cc82d39..9aa76c11089 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json
@@ -5,7 +5,6 @@
     ],
     "max_concurrency_list": [12, 16, 24, 32, 64, 128, 200],
     "server_environment_variables": {
-      "VLLM_RPC_TIMEOUT": 100000,
       "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
       "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
       "VLLM_CPU_SGL_KERNEL": 1,
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json
index c2d7768e202..0b7e7499965 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json
@@ -5,7 +5,6 @@
     ],
     "max_concurrency_list": [12, 16, 24, 32, 64, 128, 200],
     "server_environment_variables": {
-      "VLLM_RPC_TIMEOUT": 100000,
       "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
       "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
       "VLLM_CPU_SGL_KERNEL": 1,
diff --git a/.buildkite/performance-benchmarks/tests/throughput-tests-arm64-cpu.json b/.buildkite/performance-benchmarks/tests/throughput-tests-arm64-cpu.json
index da84dd4d0c6..3863cccf43a 100644
--- a/.buildkite/performance-benchmarks/tests/throughput-tests-arm64-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/throughput-tests-arm64-cpu.json
@@ -2,7 +2,6 @@
     {
         "test_name": "throughput_llama8B_tp1",
         "environment_variables": {
-            "VLLM_RPC_TIMEOUT": 100000,
             "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
             "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
             "VLLM_CPU_KVCACHE_SPACE": 40
diff --git a/.buildkite/performance-benchmarks/tests/throughput-tests-cpu.json b/.buildkite/performance-benchmarks/tests/throughput-tests-cpu.json
index dc214ddfb27..d3f16eff116 100644
--- a/.buildkite/performance-benchmarks/tests/throughput-tests-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/throughput-tests-cpu.json
@@ -2,7 +2,6 @@
     {
         "test_name": "throughput_llama8B_tp2",
         "environment_variables": {
-            "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
 	    "VLLM_CPU_SGL_KERNEL": 1,
diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md
index ce46445a983..c9bd0e5bdd9 100644
--- a/docs/contributing/profiling.md
+++ b/docs/contributing/profiling.md
@@ -35,8 +35,7 @@ Traces can be visualized using <https://ui.perfetto.dev/>.
 
 !!! tip
     To stop the profiler - it flushes out all the profile trace files to the directory. This takes time, for example for about 100 requests worth of data for a llama 70b, it takes about 10 minutes to flush out on a H100.
-    Set the env variable VLLM_RPC_TIMEOUT to a big number before you start the server. Say something like 30 minutes.
-    `export VLLM_RPC_TIMEOUT=1800000`
+    The engine client waits for this flush to complete without timing out, so simply allow the stop call to run to completion.
 
 ### Example commands and usage
 
diff --git a/vllm/envs.py b/vllm/envs.py
index c12e3cae247..dc11fbd224d 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -95,7 +95,6 @@ if TYPE_CHECKING:
     CMAKE_BUILD_TYPE: Literal["Debug", "Release", "RelWithDebInfo"] | None = None
     VERBOSE: bool = False
     VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
-    VLLM_RPC_TIMEOUT: int = 10000  # ms
     VLLM_HTTP_TIMEOUT_KEEP_ALIVE: int = 5  # seconds
     VLLM_MAX_N_SEQUENCES: int = 16384
     VLLM_PLUGINS: list[str] | None = None
@@ -1015,9 +1014,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_TEST_FORCE_LOAD_FORMAT": lambda: os.getenv(
         "VLLM_TEST_FORCE_LOAD_FORMAT", "dummy"
     ),
-    # Time in ms for the zmq client to wait for a response from the backend
-    # server for simple data operations
-    "VLLM_RPC_TIMEOUT": lambda: int(os.getenv("VLLM_RPC_TIMEOUT", "10000")),
     # Timeout in seconds for keeping HTTP connections alive in API server
     "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
         os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")