mirror of
https://github.com/ollama/ollama-python.git
synced 2026-01-13 21:57:16 +08:00
examples: model benchmark
This commit is contained in:
parent
60e7b2f9ce
commit
149b093c76
44
examples/benchmark-models.py
Normal file
44
examples/benchmark-models.py
Normal file
@ -0,0 +1,44 @@
|
||||
import time
|
||||
import ollama
|
||||
|
||||
# ---- PROMPT ----
|
||||
PROMPT = "Explain Generative AI in 100 words."
|
||||
|
||||
# ---- Get local models ----
|
||||
models = [m.model for m in ollama.list().models]
|
||||
|
||||
# ---- Header ----
|
||||
print(
|
||||
f"{'model name':<20} | "
|
||||
f"{'total(s)':>10} | "
|
||||
f"{'ttft(s)':>10} | "
|
||||
f"{'tok/s':>10} | "
|
||||
f"{'tokens':>10}"
|
||||
)
|
||||
print("-" * 72)
|
||||
|
||||
# ---- Benchmark ----
|
||||
for model in models:
|
||||
|
||||
start = time.perf_counter()
|
||||
first_token = None
|
||||
token_count = 0
|
||||
|
||||
for chunk in ollama.generate(model=model, prompt=PROMPT, stream=True):
|
||||
if first_token is None:
|
||||
first_token = time.perf_counter()
|
||||
token_count += 1
|
||||
|
||||
end = time.perf_counter()
|
||||
|
||||
total_time = end - start
|
||||
ttft = (first_token - start) if first_token else total_time
|
||||
tps = token_count / total_time if total_time > 0 else 0.0
|
||||
|
||||
print(
|
||||
f"{model:<20} | "
|
||||
f"{total_time:>10.3f} | "
|
||||
f"{ttft:>10.3f} | "
|
||||
f"{tps:>10.2f} | "
|
||||
f"{token_count:>10}"
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user