mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 00:57:23 +08:00
Some checks are pending
gh-pages / build (push) Waiting to run
Python CI / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python CI / python-ci (ubuntu-latest, 3.11) (push) Waiting to run
Python CI / python-ci (windows-latest, 3.10) (push) Waiting to run
Python CI / python-ci (windows-latest, 3.11) (push) Waiting to run
Python Integration Tests / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python Integration Tests / python-ci (windows-latest, 3.10) (push) Waiting to run
Python Notebook Tests / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python Notebook Tests / python-ci (windows-latest, 3.10) (push) Waiting to run
Python Publish (pypi) / Upload release to PyPI (push) Waiting to run
Python Smoke Tests / python-ci (ubuntu-latest, 3.10) (push) Waiting to run
Python Smoke Tests / python-ci (windows-latest, 3.10) (push) Waiting to run
Spellcheck / spellcheck (push) Waiting to run
* Initial plan for issue * Implement standard logging module and integrate with existing loggers Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Add test cases and improve documentation for standard logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Apply ruff formatting and add semversioner file for logging improvements Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove custom logger classes and refactor to use standard logging only Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Apply ruff formatting to resolve CI/CD test failures Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Add semversioner file and fix linting issues Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * ruff fixes * fix spelling error * Remove StandardProgressLogger and refactor to use standard logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove LoggerFactory and custom loggers, refactor to use standard logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix pyright error: use logger.info() instead of calling logger as function in cosmosdb_pipeline_storage.py Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * ruff fixes * Remove deprecated logger files that were marked as deprecated placeholders Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Replace custom get_logger with standard Python logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix linting issues found by ruff check --fix Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * apply ruff check fixes * add word to dictionary * Fix type checker error in ModelManager.__new__ method Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Refactor multiple logging.getLogger() calls to use single logger per file Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove progress_logger parameter from build_index() and logger parameter from generate_indexing_prompts() Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove logger parameter from run_pipeline and standardize logger naming Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Replace logger parameter with log_level parameter in CLI commands Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix import ordering in notebook files to pass poetry poe check Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove --logger parameter from smoke test command Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix Windows CI/CD issue with log file cleanup in tests Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Add StreamHandler to root logger in __main__.py for CLI logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Only add StreamHandler if root logger doesn't have existing StreamHandler Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix import ordering in notebook files to pass ruff checks Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Replace logging.StreamHandler with colorlog.StreamHandler for colorized log output Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Regenerate poetry.lock file after adding colorlog dependency Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix import ordering in notebook files to pass ruff checks Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * move printing of dataframes to debug level * remove colorlog for now * Refactor workflow callbacks to inherit from logging.Handler Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix linting issues in workflow callback handlers Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix pyright type errors in blob and file workflow callbacks Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Refactor pipeline logging to use pure logging.Handler subclasses Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Rename workflow callback classes to workflow logger classes and move to logger directory Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * update dictionary * apply ruff fixes * fix function name * simplify logger code * update * Remove error, warning, and log methods from WorkflowCallbacks and replace with standard logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * ruff fixes * Fix pyright errors by removing WorkflowCallbacks from strategy type signatures Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove ConsoleWorkflowLogger and apply consistent formatter to all handlers Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * apply ruff fixes * Refactor pipeline_logger.py to use standard FileHandler and remove FileWorkflowLogger Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove conditional azure import checks from blob_workflow_logger.py Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Fix pyright type checking errors in mock_provider.py and utils.py Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Run ruff check --fix to fix import ordering in notebooks Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Merge configure_logging and create_pipeline_logger into init_loggers function Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * Remove configure_logging and create_pipeline_logger functions, replace all usage with init_loggers Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * apply ruff fixes * cleanup unused code * Update init_loggers to accept GraphRagConfig instead of ReportingConfig Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * apply ruff check fixes * Fix test failures by providing valid GraphRagConfig with required model configurations Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * apply ruff fixes * remove logging_workflow_callback * cleanup logging messages * Add logging to track progress of pandas DataFrame apply operation in create_base_text_units Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * cleanup logger logic throughout codebase * update * more cleanup of old loggers * small logger cleanup * final code cleanup and added loggers to query * add verbose logging to query * minor code cleanup * Fix broken unit tests for chunk_text and standard_logging Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * apply ruff fixes * Fix test_chunk_text by mocking progress_ticker function instead of ProgressTicker class Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> * remove unnecessary logger * remove rich and fix type annotation * revert test formatting changes my by copilot * promote graphrag logs to root logger * add correct semversioner file * revert change to file * revert formatting changes that have no effect * fix changes after merge with main * revert unnecessary copilot changes * remove whitespace * cleanup docstring * simplify some logic with less code * update poetry lock file * ruff fixes --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jgbradley1 <654554+jgbradley1@users.noreply.github.com> Co-authored-by: Josh Bradley <joshbradley@microsoft.com>
271 lines
7.0 KiB
TOML
271 lines
7.0 KiB
TOML
[tool.poetry]
|
|
name = "graphrag"
|
|
# Maintainers: do not change the version here manually, use ./scripts/release.sh
|
|
version = "2.3.0"
|
|
description = "GraphRAG: A graph-based retrieval-augmented generation (RAG) system."
|
|
authors = [
|
|
"Alonso Guevara Fernández <alonsog@microsoft.com>",
|
|
"Andrés Morales Esquivel <andresmor@microsoft.com>",
|
|
"Chris Trevino <chtrevin@microsoft.com>",
|
|
"David Tittsworth <datittsw@microsoft.com>",
|
|
"Dayenne de Souza <ddesouza@microsoft.com>",
|
|
"Derek Worthen <deworthe@microsoft.com>",
|
|
"Gaudy Blanco Meneses <gaudyb@microsoft.com>",
|
|
"Ha Trinh <trinhha@microsoft.com>",
|
|
"Jonathan Larson <jolarso@microsoft.com>",
|
|
"Josh Bradley <joshbradley@microsoft.com>",
|
|
"Kate Lytvynets <kalytv@microsoft.com>",
|
|
"Kenny Zhang <zhangken@microsoft.com>",
|
|
"Mónica Carvajal",
|
|
"Nathan Evans <naevans@microsoft.com>",
|
|
"Rodrigo Racanicci <rracanicci@microsoft.com>",
|
|
"Sarah Smith <smithsarah@microsoft.com>",
|
|
]
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
packages = [{ include = "graphrag" }]
|
|
|
|
[tool.poetry.urls]
|
|
"Source" = "https://github.com/microsoft/graphrag"
|
|
|
|
[tool.poetry.scripts]
|
|
graphrag = "graphrag.cli.main:app"
|
|
|
|
[tool.poetry-dynamic-versioning]
|
|
enable = true
|
|
style = "pep440"
|
|
vcs = "git"
|
|
bump = true
|
|
format-jinja = """
|
|
{%- if distance == 0 -%}
|
|
{{ serialize_pep440(base, stage, revision) }}
|
|
{%- else -%}
|
|
{{ serialize_pep440(base, stage, revision, dev=distance) }}
|
|
{%- endif -%}
|
|
"""
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.10,<3.13"
|
|
environs = "^11.0.0"
|
|
|
|
# Vector Stores
|
|
azure-search-documents = "^11.5.2"
|
|
lancedb = "^0.17.0"
|
|
|
|
# Async IO
|
|
aiofiles = "^24.1.0"
|
|
|
|
# LLM
|
|
fnllm = {extras = ["azure", "openai"], version = "^0.3.0"}
|
|
json-repair = "^0.30.3"
|
|
openai = "^1.68.0"
|
|
nltk = "3.9.1"
|
|
tiktoken = "^0.9.0"
|
|
|
|
# Data-Science
|
|
numpy = "^1.25.2"
|
|
graspologic = "^3.4.1"
|
|
networkx = "^3.4.2"
|
|
pandas = "^2.2.3"
|
|
pyarrow = ">=17.0.0"
|
|
umap-learn = "^0.5.6"
|
|
|
|
# Configuration
|
|
pyyaml = "^6.0.2"
|
|
python-dotenv = "^1.0.1"
|
|
|
|
pydantic = "^2.10.3"
|
|
devtools = "^0.12.2"
|
|
typing-extensions = "^4.12.2"
|
|
|
|
# Azure
|
|
azure-cosmos = "^4.9.0"
|
|
azure-identity = "^1.19.0"
|
|
azure-storage-blob = "^12.24.0"
|
|
|
|
future = "^1.0.0" # Needed until graspologic fixes their dependency
|
|
typer = "^0.16.0"
|
|
tqdm = "^4.67.1"
|
|
|
|
textblob = "^0.18.0.post0"
|
|
spacy = "^3.8.4"
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
coverage = "^7.6.9"
|
|
ipykernel = "^6.29.5"
|
|
jupyter = "^1.1.1"
|
|
nbconvert = "^7.16.4"
|
|
poethepoet = "^0.31.1"
|
|
pyright = "^1.1.390"
|
|
pytest = "^8.3.4"
|
|
pytest-asyncio = "^0.24.0"
|
|
pytest-timeout = "^2.3.1"
|
|
ruff = "^0.8.2"
|
|
semversioner = "^2.0.5"
|
|
update-toml = "^0.2.1"
|
|
deptry = "^0.21.1"
|
|
mkdocs-material = "^9.5.48"
|
|
mkdocs-jupyter = "^0.25.1"
|
|
mkdocs-exclude-search = "^0.6.6"
|
|
pytest-dotenv = "^0.5.2"
|
|
mkdocs-typer = "^0.0.3"
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
|
|
build-backend = "poetry_dynamic_versioning.backend"
|
|
|
|
[tool.poe.tasks]
|
|
_sort_imports = "ruff check --select I --fix ."
|
|
_format_code = "ruff format ."
|
|
_ruff_check = 'ruff check .'
|
|
_pyright = "pyright"
|
|
_convert_local_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/local_search.ipynb'
|
|
_convert_global_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/global_search.ipynb'
|
|
_semversioner_release = "semversioner release"
|
|
_semversioner_changelog = "semversioner changelog > CHANGELOG.md"
|
|
_semversioner_update_toml_version = "update-toml update --path tool.poetry.version --value $(poetry run semversioner current-version)"
|
|
semversioner_add = "semversioner add-change"
|
|
coverage_report = 'coverage report --omit "**/tests/**" --show-missing'
|
|
check_format = 'ruff format . --check'
|
|
fix = "ruff check --fix ."
|
|
fix_unsafe = "ruff check --fix --unsafe-fixes ."
|
|
_test_all = "coverage run -m pytest ./tests"
|
|
test_unit = "pytest ./tests/unit"
|
|
test_integration = "pytest ./tests/integration"
|
|
test_smoke = "pytest ./tests/smoke"
|
|
test_notebook = "pytest ./tests/notebook"
|
|
test_verbs = "pytest ./tests/verbs"
|
|
index = "python -m graphrag index"
|
|
update = "python -m graphrag update"
|
|
init = "python -m graphrag init"
|
|
query = "python -m graphrag query"
|
|
prompt_tune = "python -m graphrag prompt-tune"
|
|
# Pass in a test pattern
|
|
test_only = "pytest -s -k"
|
|
serve_docs = "mkdocs serve"
|
|
build_docs = "mkdocs build"
|
|
|
|
[[tool.poe.tasks.release]]
|
|
sequence = [
|
|
'_semversioner_release',
|
|
'_semversioner_changelog',
|
|
'_semversioner_update_toml_version',
|
|
]
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.convert_docsite_notebooks]]
|
|
sequence = ['_convert_local_search_nb', '_convert_global_search_nb']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.format]]
|
|
sequence = ['_sort_imports', '_format_code']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.check]]
|
|
sequence = ['check_format', '_ruff_check', '_pyright']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[[tool.poe.tasks.test]]
|
|
sequence = ['_test_all', 'coverage_report']
|
|
ignore_fail = 'return_non_zero'
|
|
|
|
[tool.ruff]
|
|
target-version = "py310"
|
|
extend-include = ["*.ipynb"]
|
|
|
|
[tool.ruff.format]
|
|
preview = true
|
|
docstring-code-format = true
|
|
docstring-code-line-length = 20
|
|
|
|
[tool.ruff.lint]
|
|
preview = true
|
|
select = [
|
|
"E4",
|
|
"E7",
|
|
"E9",
|
|
"W291",
|
|
"YTT",
|
|
"T10",
|
|
"ICN",
|
|
"INP",
|
|
"Q",
|
|
"RSE",
|
|
"SLOT",
|
|
"INT",
|
|
"FLY",
|
|
"LOG",
|
|
"C90",
|
|
"T20",
|
|
"D",
|
|
"RET",
|
|
"PD",
|
|
"N",
|
|
"PIE",
|
|
"SIM",
|
|
"S",
|
|
"G",
|
|
"ERA",
|
|
"ASYNC",
|
|
"TID",
|
|
"UP",
|
|
"SLF",
|
|
"BLE",
|
|
"C4",
|
|
"I",
|
|
"F",
|
|
"A",
|
|
"ARG",
|
|
"PTH",
|
|
"RUF",
|
|
"B",
|
|
"TCH",
|
|
"DTZ",
|
|
"PYI",
|
|
"PT",
|
|
"EM",
|
|
"TRY",
|
|
"PERF",
|
|
"CPY",
|
|
# "FBT", # use named arguments for boolean flags
|
|
# "TD", # todos
|
|
# "FIX", # fixme
|
|
# "FURB" # preview rules
|
|
# ANN # Type annotations, re-enable when we get bandwidth
|
|
]
|
|
ignore = [
|
|
# Ignore module names shadowing Python builtins
|
|
"A005",
|
|
# Conflicts with interface argument checking
|
|
"ARG002",
|
|
"ANN204",
|
|
# TODO: Inspect these pandas rules for validity
|
|
"PD002", # prevents inplace=True
|
|
# TODO RE-Enable when we get bandwidth
|
|
"PERF203", # Needs restructuring of errors, we should bail-out on first error
|
|
"C901", # needs refactoring to remove cyclomatic complexity
|
|
"B008", # Needs to restructure our cli params with Typer into constants
|
|
]
|
|
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"tests/*" = ["S", "D", "ANN", "T201", "ASYNC", "ARG", "PTH", "TRY"]
|
|
"graphrag/index/config/*" = ["TCH"]
|
|
"*.ipynb" = ["T201"]
|
|
|
|
[tool.ruff.lint.flake8-builtins]
|
|
builtins-ignorelist = ["input", "id", "bytes"]
|
|
|
|
[tool.ruff.lint.pydocstyle]
|
|
convention = "numpy"
|
|
|
|
# https://github.com/microsoft/pyright/blob/9f81564a4685ff5c55edd3959f9b39030f590b2f/docs/configuration.md#sample-pyprojecttoml-file
|
|
[tool.pyright]
|
|
include = ["graphrag", "tests", "examples_notebooks"]
|
|
exclude = ["**/node_modules", "**/__pycache__"]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_default_fixture_loop_scope = "function"
|
|
asyncio_mode = "auto"
|
|
timeout = 1000
|
|
env_files = [".env"]
|