[project] name = "graphrag" # Maintainers: do not change the version here manually, use ./scripts/release.sh version = "2.5.0" description = "GraphRAG: A graph-based retrieval-augmented generation (RAG) system." authors = [ {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"}, {name = "Andrés Morales Esquivel", email = "andresmor@microsoft.com"}, {name = "Chris Trevino", email = "chtrevin@microsoft.com"}, {name = "David Tittsworth", email = "datittsw@microsoft.com"}, {name = "Dayenne de Souza", email = "ddesouza@microsoft.com"}, {name = "Derek Worthen", email = "deworthe@microsoft.com"}, {name = "Gaudy Blanco Meneses", email = "gaudyb@microsoft.com"}, {name = "Ha Trinh", email = "trinhha@microsoft.com"}, {name = "Jonathan Larson", email = "jolarso@microsoft.com"}, {name = "Josh Bradley", email = "joshbradley@microsoft.com"}, {name = "Kate Lytvynets", email = "kalytv@microsoft.com"}, {name = "Kenny Zhang", email = "zhangken@microsoft.com"}, {name = "Mónica Carvajal"}, {name = "Nathan Evans", email = "naevans@microsoft.com"}, {name = "Rodrigo Racanicci", email = "rracanicci@microsoft.com"}, {name = "Sarah Smith", email = "smithsarah@microsoft.com"}, ] license = "MIT" readme = "README.md" requires-python = ">=3.10,<3.13" classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dependencies = [ "environs>=11.0.0", # Vector Stores "azure-search-documents>=11.5.2", "lancedb>=0.17.0", # Async IO "aiofiles>=24.1.0", # LLM "fnllm[azure,openai]>=0.4.1", "json-repair>=0.30.3", "openai>=1.68.0", "nltk==3.9.1", "tiktoken>=0.11.0", # Data-Science "numpy>=1.25.2", "graspologic>=3.4.1", "networkx>=3.4.2", "pandas>=2.2.3", "pyarrow>=17.0.0", "umap-learn>=0.5.6", # Configuration "pyyaml>=6.0.2", "python-dotenv>=1.0.1", "pydantic>=2.10.3", "devtools>=0.12.2", "typing-extensions>=4.12.2", # Azure "azure-cosmos>=4.9.0", "azure-identity>=1.19.0", "azure-storage-blob>=12.24.0", "future>=1.0.0", # Needed until graspologic fixes their dependency "typer>=0.16.0", "tqdm>=4.67.1", "textblob>=0.18.0.post0", "spacy>=3.8.4", "litellm>=1.77.1", ] [project.optional-dependencies] dev = [ "coverage>=7.6.9", "ipykernel>=6.29.5", "jupyter>=1.1.1", "nbconvert>=7.16.4", "poethepoet>=0.31.1", "pandas-stubs>=2.3.0.250703", "pyright>=1.1.390", "pytest>=8.3.4", "pytest-asyncio>=0.24.0", "pytest-timeout>=2.3.1", "ruff>=0.8.2", "semversioner>=2.0.5", "update-toml>=0.2.1", "deptry>=0.21.1", "mkdocs-material>=9.5.48", "mkdocs-jupyter>=0.25.1", "mkdocs-exclude-search>=0.6.6", "pytest-dotenv>=0.5.2", "mkdocs-typer>=0.0.3", ] [project.scripts] graphrag = "graphrag.cli.main:app" [project.urls] Source = "https://github.com/microsoft/graphrag" [build-system] requires = ["setuptools>=64", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] include = ["graphrag*"] exclude = ["examples_notebooks*", "tests*"] # Keep poethepoet for task management to minimize changes [tool.poe.tasks] _sort_imports = "ruff check --select I --fix ." _format_code = "ruff format ." _ruff_check = 'ruff check .' _pyright = "pyright" _convert_local_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/local_search.ipynb' _convert_global_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/global_search.ipynb' _semversioner_release = "semversioner release" _semversioner_changelog = "semversioner changelog > CHANGELOG.md" _semversioner_update_toml_version = "update-toml update --path project.version --value $(semversioner current-version)" semversioner_add = "semversioner add-change" coverage_report = 'coverage report --omit "**/tests/**" --show-missing' check_format = 'ruff format . --check' fix = "ruff check --fix ." fix_unsafe = "ruff check --fix --unsafe-fixes ." _test_all = "coverage run -m pytest ./tests" test_unit = "pytest ./tests/unit" test_integration = "pytest ./tests/integration" test_smoke = "pytest ./tests/smoke" test_notebook = "pytest ./tests/notebook" test_verbs = "pytest ./tests/verbs" index = "python -m graphrag index" update = "python -m graphrag update" init = "python -m graphrag init" query = "python -m graphrag query" prompt_tune = "python -m graphrag prompt-tune" # Pass in a test pattern test_only = "pytest -s -k" serve_docs = "mkdocs serve" build_docs = "mkdocs build" [[tool.poe.tasks.release]] sequence = [ '_semversioner_release', '_semversioner_changelog', '_semversioner_update_toml_version', ] ignore_fail = 'return_non_zero' [[tool.poe.tasks.convert_docsite_notebooks]] sequence = ['_convert_local_search_nb', '_convert_global_search_nb'] ignore_fail = 'return_non_zero' [[tool.poe.tasks.format]] sequence = ['_sort_imports', '_format_code'] ignore_fail = 'return_non_zero' [[tool.poe.tasks.check]] sequence = ['check_format', '_ruff_check', '_pyright'] ignore_fail = 'return_non_zero' [[tool.poe.tasks.test]] sequence = ['_test_all', 'coverage_report'] ignore_fail = 'return_non_zero' # Keep all existing tool configurations [tool.ruff] target-version = "py310" extend-include = ["*.ipynb"] [tool.ruff.format] preview = true docstring-code-format = true docstring-code-line-length = 20 [tool.ruff.lint] preview = true select = [ "E4", "E7", "E9", "W291", "YTT", "T10", "ICN", "INP", "Q", "RSE", "SLOT", "INT", "FLY", "LOG", "C90", "T20", "D", "RET", "PD", "N", "PIE", "SIM", "S", "G", "ERA", "ASYNC", "TID", "UP", "SLF", "BLE", "C4", "I", "F", "A", "ARG", "PTH", "RUF", "B", "TCH", "DTZ", "PYI", "PT", "EM", "TRY", "PERF", "CPY", # "FBT", # use named arguments for boolean flags # "TD", # todos # "FIX", # fixme # "FURB" # preview rules # ANN # Type annotations, re-enable when we get bandwidth ] ignore = [ # Ignore module names shadowing Python builtins "A005", # Conflicts with interface argument checking "ARG002", "ANN204", # TODO: Inspect these pandas rules for validity "PD002", # prevents inplace=True # TODO RE-Enable when we get bandwidth "PERF203", # Needs restructuring of errors, we should bail-out on first error "C901", # needs refactoring to remove cyclomatic complexity "B008", # Needs to restructure our cli params with Typer into constants ] [tool.ruff.lint.per-file-ignores] "tests/*" = ["S", "D", "ANN", "T201", "ASYNC", "ARG", "PTH", "TRY"] "graphrag/index/config/*" = ["TCH"] "*.ipynb" = ["T201", "S101", "PT015", "B011"] [tool.ruff.lint.flake8-builtins] builtins-ignorelist = ["input", "id", "bytes"] [tool.ruff.lint.pydocstyle] convention = "numpy" # https://github.com/microsoft/pyright/blob/9f81564a4685ff5c55edd3959f9b39030f590b2f/docs/configuration.md#sample-pyprojecttoml-file [tool.pyright] include = ["graphrag", "tests", "examples_notebooks"] exclude = ["**/node_modules", "**/__pycache__"] [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" asyncio_mode = "auto" timeout = 1000 env_files = [".env"]