Merge v3/main into chunker-factory

This commit is contained in:
Nathan Evans 2025-12-17 15:21:51 -08:00
commit 81240ab2e3
21 changed files with 27 additions and 32 deletions

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing 'JsonPipelineCache' model."""
"""A module containing 'JsonCache' model."""
import json
from typing import Any

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing the WorkflowCallbacks registry."""
"""A module containing 'WorkflowCallbacksManager' model."""
from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
from graphrag.index.typing.pipeline_run_result import PipelineRunResult

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing load method definition."""
"""A module containing 'CSVFileReader' model."""
import logging
from io import BytesIO

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing create_input method definition."""
"""A module containing 'InputReaderFactory' model."""
import logging

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing 'PipelineCache' model."""
"""A module containing 'InputReader' model."""
from __future__ import annotations

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing load method definition."""
"""A module containing 'JSONFileReader' model."""
import json
import logging

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing load method definition."""
"""A module containing 'TextFileReader' model."""
import logging
from pathlib import Path

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing cluster_graph, apply_clustering methods definition."""
"""A module containing cluster_graph method definition."""
import logging

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing create_graph definition."""
"""A module containing compute_degree method definition."""
import networkx as nx
import pandas as pd

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing compute_edge_combined_degree methods definition."""
"""A module containing compute_edge_combined_degree method definition."""
from typing import cast

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing embed_text, load_strategy and create_row_from_embedding_data methods definition."""
"""A module containing embed_text method definition."""
import logging

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing run method definition."""
"""A module containing 'TextEmbeddingResult' model and run_embed_text method definition."""
import asyncio
import logging

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing entity_extract methods."""
"""A module containing extract_graph method."""
import logging
@ -35,7 +35,7 @@ async def extract_graph(
nonlocal num_started
text = row[text_column]
id = row[id_column]
result = await run_extract_graph(
result = await _run_extract_graph(
text=text,
source_id=id,
entity_types=entity_types,
@ -68,7 +68,7 @@ async def extract_graph(
return (entities, relationships)
async def run_extract_graph(
async def _run_extract_graph(
text: str,
source_id: str,
entity_types: list[str],

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing create_graph definition."""
"""A module containing graph_to_dataframes method definition."""
import networkx as nx
import pandas as pd

View File

@ -1,6 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing the build_mixed_context method definition."""
"""A module containing build_mixed_context method definition."""
import pandas as pd

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing create_community_reports and load_strategy methods definition."""
"""A module containing summarize_communities method definition."""
import logging
from collections.abc import Callable

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing 'GraphExtractionResult' and 'GraphExtractor' models."""
"""A module containing 'SummarizationResult' and 'SummarizeExtractor' models."""
import json
from dataclasses import dataclass

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing the 'TokenTextSplitter' class."""
"""A module containing 'TokenTextSplitter' class and 'split_single_text_on_tokens' function."""
import logging
from abc import ABC

View File

@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""Apply a generic transform function to each row in a table."""
"""A module containing derive_from_rows, derive_from_rows_asyncio_threads, and derive_from_rows_asyncio methods."""
import asyncio
import inspect
@ -55,9 +55,6 @@ async def derive_from_rows(
raise ValueError(msg)
"""A module containing the derive_from_rows_async method."""
async def derive_from_rows_asyncio_threads(
input: pd.DataFrame,
transform: Callable[[pd.Series], Awaitable[ItemType]],
@ -88,9 +85,6 @@ async def derive_from_rows_asyncio_threads(
)
"""A module containing the derive_from_rows_async method."""
async def derive_from_rows_asyncio(
input: pd.DataFrame,
transform: Callable[[pd.Series], Awaitable[ItemType]],

View File

@ -1,7 +1,7 @@
# Copyright (c) 2025 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing mock model provider definitions."""
"""A module containing 'MockChatLLM' and 'MockEmbeddingLLM' models."""
from collections.abc import AsyncGenerator, Generator
from typing import Any

View File

@ -2,7 +2,7 @@
# Licensed under the MIT License
import unittest
from graphrag.index.operations.extract_graph.extract_graph import run_extract_graph
from graphrag.index.operations.extract_graph.extract_graph import _run_extract_graph
from graphrag.prompts.index.extract_graph import GRAPH_EXTRACTION_PROMPT
from tests.unit.indexing.verbs.helpers.mock_llm import create_mock_llm
@ -22,7 +22,7 @@ SIMPLE_EXTRACTION_RESPONSE = """
class TestRunChain(unittest.IsolatedAsyncioTestCase):
async def test_run_extract_graph_single_document_correct_entities_returned(self):
entities_df, _ = await run_extract_graph(
entities_df, _ = await _run_extract_graph(
text="test_text",
source_id="1",
entity_types=["person"],
@ -39,7 +39,7 @@ class TestRunChain(unittest.IsolatedAsyncioTestCase):
)
async def test_run_extract_graph_single_document_correct_edges_returned(self):
_, relationships_df = await run_extract_graph(
_, relationships_df = await _run_extract_graph(
text="test_text",
source_id="1",
entity_types=["person"],
@ -61,7 +61,7 @@ class TestRunChain(unittest.IsolatedAsyncioTestCase):
}
async def test_run_extract_graph_single_document_source_ids_mapped(self):
entities_df, relationships_df = await run_extract_graph(
entities_df, relationships_df = await _run_extract_graph(
text="test_text",
source_id="1",
entity_types=["person"],