mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 00:57:23 +08:00
Some checks failed
gh-pages / build (push) Has been cancelled
Python CI / python-ci (ubuntu-latest, 3.10) (push) Has been cancelled
Python CI / python-ci (ubuntu-latest, 3.11) (push) Has been cancelled
Python CI / python-ci (windows-latest, 3.10) (push) Has been cancelled
Python CI / python-ci (windows-latest, 3.11) (push) Has been cancelled
Python Integration Tests / python-ci (ubuntu-latest, 3.10) (push) Has been cancelled
Python Integration Tests / python-ci (windows-latest, 3.10) (push) Has been cancelled
Python Notebook Tests / python-ci (ubuntu-latest, 3.10) (push) Has been cancelled
Python Notebook Tests / python-ci (windows-latest, 3.10) (push) Has been cancelled
Python Publish (pypi) / Upload release to PyPI (push) Has been cancelled
Python Smoke Tests / python-ci (ubuntu-latest, 3.10) (push) Has been cancelled
Python Smoke Tests / python-ci (windows-latest, 3.10) (push) Has been cancelled
Spellcheck / spellcheck (push) Has been cancelled
* Initial plan * Refactor VectorStoreFactory to use registration functionality like StorageFactory Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix linting issues in VectorStoreFactory refactoring Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove backward compatibility support from VectorStoreFactory and StorageFactory Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Run ruff check --fix and ruff format, add semversioner file Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * ruff formatting fixes * Fix pytest errors in storage factory tests by updating PipelineStorage interface implementation Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * ruff formatting fixes * update storage factory design * Refactor CacheFactory to use registration functionality like StorageFactory Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * revert copilot changes * fix copilot changes * update comments * Fix failing pytest compatibility for factory tests Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * update class instantiation issue * ruff fixes * fix pytest * add default value * ruff formatting changes * ruff fixes * revert minor changes * cleanup cache factory * Update CacheFactory tests to match consistent factory pattern Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * update pytest thresholds * adjust threshold levels * Add custom vector store implementation notebook Create comprehensive notebook demonstrating how to implement and register custom vector stores with GraphRAG as a plug-and-play framework. Includes: - Complete implementation of SimpleInMemoryVectorStore - Registration with VectorStoreFactory - Testing and validation examples - Configuration examples for GraphRAG settings - Advanced features and best practices - Production considerations checklist The notebook provides a complete walkthrough for developers to understand and implement their own vector store backends. Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * remove sample notebook for now * update tests * fix cache pytests * add pandas-stub to dev dependencies * disable warning check for well known key * skip tests when running on ubuntu * add documentation for custom vector store implementations * ignore ruff findings in notebooks * fix merge breakages * speedup CLI import statements * remove unnecessary import statements in init file * Add str type option on storage/cache type * Fix store name * Add LoggerFactory * Fix up logging setup across CLI/API * Add LoggerFactory test * Fix err message * Semver * Remove enums from factory methods --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> Co-authored-by: Josh Bradley <joshbradley@microsoft.com> Co-authored-by: Nathan Evans <github@talkswithnumbers.com>
263 lines
7.5 KiB
TOML
263 lines
7.5 KiB
TOML
[project]
|
|
name = "graphrag"
|
|
# Maintainers: do not change the version here manually, use ./scripts/release.sh
|
|
version = "2.5.0"
|
|
description = "GraphRAG: A graph-based retrieval-augmented generation (RAG) system."
|
|
authors = [
|
|
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
|
|
{name = "Andrés Morales Esquivel", email = "andresmor@microsoft.com"},
|
|
{name = "Chris Trevino", email = "chtrevin@microsoft.com"},
|
|
{name = "David Tittsworth", email = "datittsw@microsoft.com"},
|
|
{name = "Dayenne de Souza", email = "ddesouza@microsoft.com"},
|
|
{name = "Derek Worthen", email = "deworthe@microsoft.com"},
|
|
{name = "Gaudy Blanco Meneses", email = "gaudyb@microsoft.com"},
|
|
{name = "Ha Trinh", email = "trinhha@microsoft.com"},
|
|
{name = "Jonathan Larson", email = "jolarso@microsoft.com"},
|
|
{name = "Josh Bradley", email = "joshbradley@microsoft.com"},
|
|
{name = "Kate Lytvynets", email = "kalytv@microsoft.com"},
|
|
{name = "Kenny Zhang", email = "zhangken@microsoft.com"},
|
|
{name = "Mónica Carvajal"},
|
|
{name = "Nathan Evans", email = "naevans@microsoft.com"},
|
|
{name = "Rodrigo Racanicci", email = "rracanicci@microsoft.com"},
|
|
{name = "Sarah Smith", email = "smithsarah@microsoft.com"},
|
|
]
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
requires-python = ">=3.10,<3.13"
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
]
|
|
|
|
dependencies = [
|
|
"environs>=11.0.0",
|
|
# Vector Stores
|
|
"azure-search-documents>=11.5.2",
|
|
"lancedb>=0.17.0",
|
|
# Async IO
|
|
"aiofiles>=24.1.0",
|
|
# LLM
|
|
"fnllm[azure,openai]>=0.3.0",
|
|
"json-repair>=0.30.3",
|
|
"openai>=1.68.0",
|
|
"nltk==3.9.1",
|
|
"tiktoken>=0.9.0",
|
|
# Data-Science
|
|
"numpy>=1.25.2",
|
|
"graspologic>=3.4.1",
|
|
"networkx>=3.4.2",
|
|
"pandas>=2.2.3",
|
|
"pyarrow>=17.0.0",
|
|
"umap-learn>=0.5.6",
|
|
# Configuration
|
|
"pyyaml>=6.0.2",
|
|
"python-dotenv>=1.0.1",
|
|
"pydantic>=2.10.3",
|
|
"devtools>=0.12.2",
|
|
"typing-extensions>=4.12.2",
|
|
# Azure
|
|
"azure-cosmos>=4.9.0",
|
|
"azure-identity>=1.19.0",
|
|
"azure-storage-blob>=12.24.0",
|
|
"future>=1.0.0", # Needed until graspologic fixes their dependency
|
|
"typer>=0.16.0",
|
|
"tqdm>=4.67.1",
|
|
"textblob>=0.18.0.post0",
|
|
"spacy>=3.8.4",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
"coverage>=7.6.9",
|
|
"ipykernel>=6.29.5",
|
|
"jupyter>=1.1.1",
|
|
"nbconvert>=7.16.4",
|
|
"poethepoet>=0.31.1",
|
|
"pandas-stubs>=2.3.0.250703",
|
|
"pyright>=1.1.390",
|
|
"pytest>=8.3.4",
|
|
"pytest-asyncio>=0.24.0",
|
|
"pytest-timeout>=2.3.1",
|
|
"ruff>=0.8.2",
|
|
"semversioner>=2.0.5",
|
|
"update-toml>=0.2.1",
|
|
"deptry>=0.21.1",
|
|
"mkdocs-material>=9.5.48",
|
|
"mkdocs-jupyter>=0.25.1",
|
|
"mkdocs-exclude-search>=0.6.6",
|
|
"pytest-dotenv>=0.5.2",
|
|
"mkdocs-typer>=0.0.3",
|
|
]
|
|
|
|
[project.scripts]
|
|
graphrag = "graphrag.cli.main:app"
|
|
|
|
[project.urls]
|
|
Source = "https://github.com/microsoft/graphrag"
|
|
|
|
[build-system]
|
|
requires = ["setuptools>=64", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[tool.setuptools.packages.find]
|
|
include = ["graphrag*"]
|
|
exclude = ["examples_notebooks*", "tests*"]
|
|
|
|
# Keep poethepoet for task management to minimize changes
|
|
[tool.poe.tasks]
|
|
_sort_imports = "ruff check --select I --fix ."
|
|
_format_code = "ruff format ."
|
|
_ruff_check = 'ruff check .'
|
|
_pyright = "pyright"
|
|
_convert_local_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/local_search.ipynb'
|
|
_convert_global_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/global_search.ipynb'
|
|
_semversioner_release = "semversioner release"
|
|
_semversioner_changelog = "semversioner changelog > CHANGELOG.md"
|
|
_semversioner_update_toml_version = "update-toml update --path project.version --value $(semversioner current-version)"
|
|
semversioner_add = "semversioner add-change"
|
|
coverage_report = 'coverage report --omit "**/tests/**" --show-missing'
|
|
check_format = 'ruff format . --check'
|
|
fix = "ruff check --fix ."
|
|
fix_unsafe = "ruff check --fix --unsafe-fixes ."
|
|
_test_all = "coverage run -m pytest ./tests"
|
|
test_unit = "pytest ./tests/unit"
|
|
test_integration = "pytest ./tests/integration"
|
|
test_smoke = "pytest ./tests/smoke"
|
|
test_notebook = "pytest ./tests/notebook"
|
|
test_verbs = "pytest ./tests/verbs"
|
|
index = "python -m graphrag index"
|
|
update = "python -m graphrag update"
|
|
init = "python -m graphrag init"
|
|
query = "python -m graphrag query"
|
|
prompt_tune = "python -m graphrag prompt-tune"
|
|
# Pass in a test pattern
|
|
test_only = "pytest -s -k"
|
|
serve_docs = "mkdocs serve"
|
|
build_docs = "mkdocs build"
|
|
|
|
[[tool.poe.tasks.release]]
|
|
sequence = [
|
|
'_semversioner_release',
|
|
'_semversioner_changelog',
|
|
'_semversioner_update_toml_version',
|
|
]
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.convert_docsite_notebooks]]
|
|
sequence = ['_convert_local_search_nb', '_convert_global_search_nb']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.format]]
|
|
sequence = ['_sort_imports', '_format_code']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.check]]
|
|
sequence = ['check_format', '_ruff_check', '_pyright']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.test]]
|
|
sequence = ['_test_all', 'coverage_report']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
# Keep all existing tool configurations
|
|
[tool.ruff]
|
|
target-version = "py310"
|
|
extend-include = ["*.ipynb"]
|
|
|
|
[tool.ruff.format]
|
|
preview = true
|
|
docstring-code-format = true
|
|
docstring-code-line-length = 20
|
|
|
|
[tool.ruff.lint]
|
|
preview = true
|
|
select = [
|
|
"E4",
|
|
"E7",
|
|
"E9",
|
|
"W291",
|
|
"YTT",
|
|
"T10",
|
|
"ICN",
|
|
"INP",
|
|
"Q",
|
|
"RSE",
|
|
"SLOT",
|
|
"INT",
|
|
"FLY",
|
|
"LOG",
|
|
"C90",
|
|
"T20",
|
|
"D",
|
|
"RET",
|
|
"PD",
|
|
"N",
|
|
"PIE",
|
|
"SIM",
|
|
"S",
|
|
"G",
|
|
"ERA",
|
|
"ASYNC",
|
|
"TID",
|
|
"UP",
|
|
"SLF",
|
|
"BLE",
|
|
"C4",
|
|
"I",
|
|
"F",
|
|
"A",
|
|
"ARG",
|
|
"PTH",
|
|
"RUF",
|
|
"B",
|
|
"TCH",
|
|
"DTZ",
|
|
"PYI",
|
|
"PT",
|
|
"EM",
|
|
"TRY",
|
|
"PERF",
|
|
"CPY",
|
|
# "FBT", # use named arguments for boolean flags
|
|
# "TD", # todos
|
|
# "FIX", # fixme
|
|
# "FURB" # preview rules
|
|
# ANN # Type annotations, re-enable when we get bandwidth
|
|
]
|
|
ignore = [
|
|
# Ignore module names shadowing Python builtins
|
|
"A005",
|
|
# Conflicts with interface argument checking
|
|
"ARG002",
|
|
"ANN204",
|
|
# TODO: Inspect these pandas rules for validity
|
|
"PD002", # prevents inplace=True
|
|
# TODO RE-Enable when we get bandwidth
|
|
"PERF203", # Needs restructuring of errors, we should bail-out on first error
|
|
"C901", # needs refactoring to remove cyclomatic complexity
|
|
"B008", # Needs to restructure our cli params with Typer into constants
|
|
]
|
|
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"tests/*" = ["S", "D", "ANN", "T201", "ASYNC", "ARG", "PTH", "TRY"]
|
|
"graphrag/index/config/*" = ["TCH"]
|
|
"*.ipynb" = ["T201", "S101", "PT015", "B011"]
|
|
|
|
[tool.ruff.lint.flake8-builtins]
|
|
builtins-ignorelist = ["input", "id", "bytes"]
|
|
|
|
[tool.ruff.lint.pydocstyle]
|
|
convention = "numpy"
|
|
|
|
# https://github.com/microsoft/pyright/blob/9f81564a4685ff5c55edd3959f9b39030f590b2f/docs/configuration.md#sample-pyprojecttoml-file
|
|
[tool.pyright]
|
|
include = ["graphrag", "tests", "examples_notebooks"]
|
|
exclude = ["**/node_modules", "**/__pycache__"]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_default_fixture_loop_scope = "function"
|
|
asyncio_mode = "auto"
|
|
timeout = 1000
|
|
env_files = [".env"] |