ollama-python/examples/benchmark-models.py
2026-01-06 21:30:31 +03:00

44 lines
1.0 KiB
Python

import time
import ollama
# ---- PROMPT ----
PROMPT = "Explain Generative AI in 100 words."
# ---- Get local models ----
models = [m.model for m in ollama.list().models]
# ---- Header ----
print(
f"{'model name':<20} | "
f"{'total(s)':>10} | "
f"{'ttft(s)':>10} | "
f"{'tok/s':>10} | "
f"{'tokens':>10}"
)
print("-" * 72)
# ---- Benchmark ----
for model in models:
start = time.perf_counter()
first_token = None
token_count = 0
for chunk in ollama.generate(model=model, prompt=PROMPT, stream=True):
if first_token is None:
first_token = time.perf_counter()
token_count += 1
end = time.perf_counter()
total_time = end - start
ttft = (first_token - start) if first_token else total_time
tps = token_count / total_time if total_time > 0 else 0.0
print(
f"{model:<20} | "
f"{total_time:>10.3f} | "
f"{ttft:>10.3f} | "
f"{tps:>10.2f} | "
f"{token_count:>10}"
)