graphrag/pyproject.toml
Derek Worthen 2b70e4a4f3
Tokenizer (#2051)
* Add LiteLLM chat and embedding model providers.

* Fix code review findings.

* Add litellm.

* Fix formatting.

* Update dictionary.

* Update litellm.

* Fix embedding.

* Remove manual use of tiktoken and replace with
Tokenizer interface. Adds support for encoding
and decoding the models supported by litellm.

* Update litellm.

* Configure litellm to drop unsupported params.

* Cleanup semversioner release notes.

* Add num_tokens util to Tokenizer interface.

* Update litellm service factories.

* Cleanup litellm chat/embedding model argument assignment.

* Update chat and embedding type field for litellm use and future migration away from fnllm.

* Flatten litellm service organization.

* Update litellm.

* Update litellm factory validation.

* Flatten litellm rate limit service organization.

* Update rate limiter - disable with None/null instead of 0.

* Fix usage of get_tokenizer.

* Update litellm service registrations.

* Add jitter to exponential retry.

* Update validation.

* Update validation.

* Add litellm request logging layer.

* Update cache key.

* Update defaults.

---------

Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
2025-09-22 13:55:14 -06:00

265 lines
7.5 KiB
TOML

[project]
name = "graphrag"
# Maintainers: do not change the version here manually, use ./scripts/release.sh
version = "2.5.0"
description = "GraphRAG: A graph-based retrieval-augmented generation (RAG) system."
authors = [
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
{name = "Andrés Morales Esquivel", email = "andresmor@microsoft.com"},
{name = "Chris Trevino", email = "chtrevin@microsoft.com"},
{name = "David Tittsworth", email = "datittsw@microsoft.com"},
{name = "Dayenne de Souza", email = "ddesouza@microsoft.com"},
{name = "Derek Worthen", email = "deworthe@microsoft.com"},
{name = "Gaudy Blanco Meneses", email = "gaudyb@microsoft.com"},
{name = "Ha Trinh", email = "trinhha@microsoft.com"},
{name = "Jonathan Larson", email = "jolarso@microsoft.com"},
{name = "Josh Bradley", email = "joshbradley@microsoft.com"},
{name = "Kate Lytvynets", email = "kalytv@microsoft.com"},
{name = "Kenny Zhang", email = "zhangken@microsoft.com"},
{name = "Mónica Carvajal"},
{name = "Nathan Evans", email = "naevans@microsoft.com"},
{name = "Rodrigo Racanicci", email = "rracanicci@microsoft.com"},
{name = "Sarah Smith", email = "smithsarah@microsoft.com"},
]
license = "MIT"
readme = "README.md"
requires-python = ">=3.10,<3.13"
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dependencies = [
"environs>=11.0.0",
# Vector Stores
"azure-search-documents>=11.5.2",
"lancedb>=0.17.0",
# Async IO
"aiofiles>=24.1.0",
# LLM
"fnllm[azure,openai]>=0.4.1",
"json-repair>=0.30.3",
"openai>=1.68.0",
"nltk==3.9.1",
"tiktoken>=0.11.0",
# Data-Science
"numpy>=1.25.2",
"graspologic>=3.4.1",
"networkx>=3.4.2",
"pandas>=2.2.3",
"pyarrow>=17.0.0",
"umap-learn>=0.5.6",
# Configuration
"pyyaml>=6.0.2",
"python-dotenv>=1.0.1",
"pydantic>=2.10.3",
"devtools>=0.12.2",
"typing-extensions>=4.12.2",
# Azure
"azure-cosmos>=4.9.0",
"azure-identity>=1.19.0",
"azure-storage-blob>=12.24.0",
"future>=1.0.0", # Needed until graspologic fixes their dependency
"typer>=0.16.0",
"tqdm>=4.67.1",
"textblob>=0.18.0.post0",
"spacy>=3.8.4",
"litellm>=1.77.1",
]
[project.optional-dependencies]
dev = [
"coverage>=7.6.9",
"ipykernel>=6.29.5",
"jupyter>=1.1.1",
"nbconvert>=7.16.4",
"poethepoet>=0.31.1",
"pandas-stubs>=2.3.0.250703",
"pyright>=1.1.390",
"pytest>=8.3.4",
"pytest-asyncio>=0.24.0",
"pytest-timeout>=2.3.1",
"ruff>=0.8.2",
"semversioner>=2.0.5",
"update-toml>=0.2.1",
"deptry>=0.21.1",
"mkdocs-material>=9.5.48",
"mkdocs-jupyter>=0.25.1",
"mkdocs-exclude-search>=0.6.6",
"pytest-dotenv>=0.5.2",
"mkdocs-typer>=0.0.3",
]
[project.scripts]
graphrag = "graphrag.cli.main:app"
[project.urls]
Source = "https://github.com/microsoft/graphrag"
[build-system]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
include = ["graphrag*"]
exclude = ["examples_notebooks*", "tests*"]
# Keep poethepoet for task management to minimize changes
[tool.poe.tasks]
_sort_imports = "ruff check --select I --fix ."
_format_code = "ruff format ."
_ruff_check = 'ruff check .'
_pyright = "pyright"
_convert_local_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/local_search.ipynb'
_convert_global_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/global_search.ipynb'
_semversioner_release = "semversioner release"
_semversioner_changelog = "semversioner changelog > CHANGELOG.md"
_semversioner_update_toml_version = "update-toml update --path project.version --value $(semversioner current-version)"
semversioner_add = "semversioner add-change"
coverage_report = 'coverage report --omit "**/tests/**" --show-missing'
check_format = 'ruff format . --check'
fix = "ruff check --fix ."
fix_unsafe = "ruff check --fix --unsafe-fixes ."
_test_all = "coverage run -m pytest ./tests"
test_unit = "pytest ./tests/unit"
test_integration = "pytest ./tests/integration"
test_smoke = "pytest ./tests/smoke"
test_notebook = "pytest ./tests/notebook"
test_verbs = "pytest ./tests/verbs"
index = "python -m graphrag index"
update = "python -m graphrag update"
init = "python -m graphrag init"
query = "python -m graphrag query"
prompt_tune = "python -m graphrag prompt-tune"
# Pass in a test pattern
test_only = "pytest -s -k"
serve_docs = "mkdocs serve"
build_docs = "mkdocs build"
[[tool.poe.tasks.release]]
sequence = [
'_semversioner_release',
'_semversioner_changelog',
'_semversioner_update_toml_version',
]
ignore_fail = 'return_non_zero'
[[tool.poe.tasks.convert_docsite_notebooks]]
sequence = ['_convert_local_search_nb', '_convert_global_search_nb']
ignore_fail = 'return_non_zero'
[[tool.poe.tasks.format]]
sequence = ['_sort_imports', '_format_code']
ignore_fail = 'return_non_zero'
[[tool.poe.tasks.check]]
sequence = ['check_format', '_ruff_check', '_pyright']
ignore_fail = 'return_non_zero'
[[tool.poe.tasks.test]]
sequence = ['_test_all', 'coverage_report']
ignore_fail = 'return_non_zero'
# Keep all existing tool configurations
[tool.ruff]
target-version = "py310"
extend-include = ["*.ipynb"]
[tool.ruff.format]
preview = true
docstring-code-format = true
docstring-code-line-length = 20
[tool.ruff.lint]
preview = true
select = [
"E4",
"E7",
"E9",
"W291",
"YTT",
"T10",
"ICN",
"INP",
"Q",
"RSE",
"SLOT",
"INT",
"FLY",
"LOG",
"C90",
"T20",
"D",
"RET",
"PD",
"N",
"PIE",
"SIM",
"S",
"G",
"ERA",
"ASYNC",
"TID",
"UP",
"SLF",
"BLE",
"C4",
"I",
"F",
"A",
"ARG",
"PTH",
"RUF",
"B",
"TCH",
"DTZ",
"PYI",
"PT",
"EM",
"TRY",
"PERF",
"CPY",
# "FBT", # use named arguments for boolean flags
# "TD", # todos
# "FIX", # fixme
# "FURB" # preview rules
# ANN # Type annotations, re-enable when we get bandwidth
]
ignore = [
# Ignore module names shadowing Python builtins
"A005",
# Conflicts with interface argument checking
"ARG002",
"ANN204",
# TODO: Inspect these pandas rules for validity
"PD002", # prevents inplace=True
# TODO RE-Enable when we get bandwidth
"PERF203", # Needs restructuring of errors, we should bail-out on first error
"C901", # needs refactoring to remove cyclomatic complexity
"B008", # Needs to restructure our cli params with Typer into constants
]
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["S", "D", "ANN", "T201", "ASYNC", "ARG", "PTH", "TRY"]
"graphrag/index/config/*" = ["TCH"]
"*.ipynb" = ["T201", "S101", "PT015", "B011"]
[tool.ruff.lint.flake8-builtins]
builtins-ignorelist = ["input", "id", "bytes"]
[tool.ruff.lint.pydocstyle]
convention = "numpy"
# https://github.com/microsoft/pyright/blob/9f81564a4685ff5c55edd3959f9b39030f590b2f/docs/configuration.md#sample-pyprojecttoml-file
[tool.pyright]
include = ["graphrag", "tests", "examples_notebooks"]
exclude = ["**/node_modules", "**/__pycache__"]
[tool.pytest.ini_options]
asyncio_default_fixture_loop_scope = "function"
asyncio_mode = "auto"
timeout = 1000
env_files = [".env"]