mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
import time
|
|
|
|
import requests
|
|
|
|
|
|
def wait_for_endpoint_ready(url: str, timeout: int = 300):
|
|
start = time.monotonic()
|
|
while time.monotonic() - start < timeout:
|
|
try:
|
|
time.sleep(1)
|
|
if requests.get(url, timeout=5).status_code == 200:
|
|
print(f"endpoint {url} is ready")
|
|
return
|
|
except Exception as err:
|
|
print(f"endpoint {url} is not ready, with exception: {err}")
|
|
raise RuntimeError(f"Endpoint {url} did not become ready within {timeout} seconds")
|
|
|
|
|
|
def wait_for_endpoint_down(url: str, timeout: int = 300):
|
|
start = time.monotonic()
|
|
while time.monotonic() - start < timeout:
|
|
try:
|
|
if requests.get(url, timeout=5).status_code >= 100:
|
|
print(f"endpoint {url} returned status code {requests.get(url).status_code}")
|
|
time.sleep(1)
|
|
except Exception as err:
|
|
print(f"endpoint {url} is down, with exception: {err}")
|
|
return
|
|
raise RuntimeError(f"Endpoint {url} did not become down within {timeout} seconds")
|