mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-26 05:41:50 +08:00
* Remove "strategy" from community reports config/workflow * Remove extraction strategy from extract_graph * Remove summarization strategy from extract_graph * Remove strategy from claim extraction * Strongly type prompt templates * Remove strategy from embed_text * Push hydrated params into community report workflows * Push hyrdated params into extract covariates * Push hydrated params into extract graph NLP * Push hydrated params into extract graph * Push hydrated params into text embeddings * Remove a few more low-level defaults * Semver * Remove configurable prompt delimiters * Update smoke tests
21 lines
525 B
Python
21 lines
525 B
Python
# Copyright (c) 2024 Microsoft Corporation.
|
|
# Licensed under the MIT License
|
|
|
|
from graphrag.tokenizer.get_tokenizer import get_tokenizer
|
|
|
|
|
|
def test_encode_basic():
|
|
tokenizer = get_tokenizer()
|
|
result = tokenizer.encode("abc def")
|
|
|
|
assert result == [26682, 1056], (
|
|
f"Encoding failed to return expected tokens, sent {result}"
|
|
)
|
|
|
|
|
|
def test_num_tokens_empty_input():
|
|
tokenizer = get_tokenizer()
|
|
result = len(tokenizer.encode(""))
|
|
|
|
assert result == 0, "Token count for empty input should be 0"
|