diff --git a/packages/graphrag-cache/graphrag_cache/json_cache.py b/packages/graphrag-cache/graphrag_cache/json_cache.py index 94d8e34a..dc78551c 100644 --- a/packages/graphrag-cache/graphrag_cache/json_cache.py +++ b/packages/graphrag-cache/graphrag_cache/json_cache.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing 'JsonPipelineCache' model.""" +"""A module containing 'JsonCache' model.""" import json from typing import Any diff --git a/packages/graphrag/graphrag/callbacks/workflow_callbacks_manager.py b/packages/graphrag/graphrag/callbacks/workflow_callbacks_manager.py index 6a030ec6..9d3803f1 100644 --- a/packages/graphrag/graphrag/callbacks/workflow_callbacks_manager.py +++ b/packages/graphrag/graphrag/callbacks/workflow_callbacks_manager.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing the WorkflowCallbacks registry.""" +"""A module containing 'WorkflowCallbacksManager' model.""" from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks from graphrag.index.typing.pipeline_run_result import PipelineRunResult diff --git a/packages/graphrag/graphrag/index/input/csv.py b/packages/graphrag/graphrag/index/input/csv.py index a70863e7..810ffdd0 100644 --- a/packages/graphrag/graphrag/index/input/csv.py +++ b/packages/graphrag/graphrag/index/input/csv.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing load method definition.""" +"""A module containing 'CSVFileReader' model.""" import logging from io import BytesIO diff --git a/packages/graphrag/graphrag/index/input/factory.py b/packages/graphrag/graphrag/index/input/factory.py index c938c6e4..bc3b8112 100644 --- a/packages/graphrag/graphrag/index/input/factory.py +++ b/packages/graphrag/graphrag/index/input/factory.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing create_input method definition.""" +"""A module containing 'InputReaderFactory' model.""" import logging diff --git a/packages/graphrag/graphrag/index/input/input_reader.py b/packages/graphrag/graphrag/index/input/input_reader.py index 98a713e5..18dc713d 100644 --- a/packages/graphrag/graphrag/index/input/input_reader.py +++ b/packages/graphrag/graphrag/index/input/input_reader.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing 'PipelineCache' model.""" +"""A module containing 'InputReader' model.""" from __future__ import annotations diff --git a/packages/graphrag/graphrag/index/input/json.py b/packages/graphrag/graphrag/index/input/json.py index cae7db5d..e9e1a7ab 100644 --- a/packages/graphrag/graphrag/index/input/json.py +++ b/packages/graphrag/graphrag/index/input/json.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing load method definition.""" +"""A module containing 'JSONFileReader' model.""" import json import logging diff --git a/packages/graphrag/graphrag/index/input/text.py b/packages/graphrag/graphrag/index/input/text.py index dac2c4c7..f9e2d777 100644 --- a/packages/graphrag/graphrag/index/input/text.py +++ b/packages/graphrag/graphrag/index/input/text.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing load method definition.""" +"""A module containing 'TextFileReader' model.""" import logging from pathlib import Path diff --git a/packages/graphrag/graphrag/index/operations/chunk_text/chunk_text.py b/packages/graphrag/graphrag/index/operations/chunk_text/chunk_text.py index fc50370e..cd63fec4 100644 --- a/packages/graphrag/graphrag/index/operations/chunk_text/chunk_text.py +++ b/packages/graphrag/graphrag/index/operations/chunk_text/chunk_text.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing _get_num_total, chunk, run_strategy and load_strategy methods definitions.""" +"""A module containing chunk_text method definitions.""" from typing import Any, cast @@ -54,7 +54,7 @@ def chunk_text( strategy: sentence ``` """ - strategy_exec = load_strategy(strategy) + strategy_exec = _load_strategy(strategy) num_total = _get_num_total(input, column) tick = progress_ticker(callbacks.progress, num_total) @@ -67,7 +67,7 @@ def chunk_text( input.apply( cast( "Any", - lambda x: run_strategy( + lambda x: _run_strategy( strategy_exec, x[column], config, @@ -79,7 +79,7 @@ def chunk_text( ) -def run_strategy( +def _run_strategy( strategy_exec: ChunkStrategy, input: ChunkInput, config: ChunkingConfig, @@ -111,7 +111,7 @@ def run_strategy( return results -def load_strategy(strategy: ChunkStrategyType) -> ChunkStrategy: +def _load_strategy(strategy: ChunkStrategyType) -> ChunkStrategy: """Load strategy method definition.""" match strategy: case ChunkStrategyType.tokens: diff --git a/packages/graphrag/graphrag/index/operations/chunk_text/strategies.py b/packages/graphrag/graphrag/index/operations/chunk_text/strategies.py index 9d48786e..85a1537e 100644 --- a/packages/graphrag/graphrag/index/operations/chunk_text/strategies.py +++ b/packages/graphrag/graphrag/index/operations/chunk_text/strategies.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing chunk strategies.""" +"""A module containing run_tokens and run_sentences methods.""" from collections.abc import Iterable diff --git a/packages/graphrag/graphrag/index/operations/cluster_graph.py b/packages/graphrag/graphrag/index/operations/cluster_graph.py index d7e1b0c7..ff4632ea 100644 --- a/packages/graphrag/graphrag/index/operations/cluster_graph.py +++ b/packages/graphrag/graphrag/index/operations/cluster_graph.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing cluster_graph, apply_clustering methods definition.""" +"""A module containing cluster_graph method definition.""" import logging diff --git a/packages/graphrag/graphrag/index/operations/compute_degree.py b/packages/graphrag/graphrag/index/operations/compute_degree.py index b720bf6d..8de95ad2 100644 --- a/packages/graphrag/graphrag/index/operations/compute_degree.py +++ b/packages/graphrag/graphrag/index/operations/compute_degree.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing create_graph definition.""" +"""A module containing compute_degree method definition.""" import networkx as nx import pandas as pd diff --git a/packages/graphrag/graphrag/index/operations/compute_edge_combined_degree.py b/packages/graphrag/graphrag/index/operations/compute_edge_combined_degree.py index f6c75b00..440fe392 100644 --- a/packages/graphrag/graphrag/index/operations/compute_edge_combined_degree.py +++ b/packages/graphrag/graphrag/index/operations/compute_edge_combined_degree.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing compute_edge_combined_degree methods definition.""" +"""A module containing compute_edge_combined_degree method definition.""" from typing import cast diff --git a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py index d8144f8c..08f941f9 100644 --- a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py +++ b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing embed_text, load_strategy and create_row_from_embedding_data methods definition.""" +"""A module containing embed_text method definition.""" import logging diff --git a/packages/graphrag/graphrag/index/operations/embed_text/run_embed_text.py b/packages/graphrag/graphrag/index/operations/embed_text/run_embed_text.py index fc3da5ee..cb8b2016 100644 --- a/packages/graphrag/graphrag/index/operations/embed_text/run_embed_text.py +++ b/packages/graphrag/graphrag/index/operations/embed_text/run_embed_text.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing run method definition.""" +"""A module containing 'TextEmbeddingResult' model and run_embed_text method definition.""" import asyncio import logging diff --git a/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py b/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py index 0d2ab114..96661c41 100644 --- a/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py +++ b/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing entity_extract methods.""" +"""A module containing extract_graph method.""" import logging @@ -35,7 +35,7 @@ async def extract_graph( nonlocal num_started text = row[text_column] id = row[id_column] - result = await run_extract_graph( + result = await _run_extract_graph( text=text, source_id=id, entity_types=entity_types, @@ -68,7 +68,7 @@ async def extract_graph( return (entities, relationships) -async def run_extract_graph( +async def _run_extract_graph( text: str, source_id: str, entity_types: list[str], diff --git a/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py b/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py index dbc608f6..632eb125 100644 --- a/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py +++ b/packages/graphrag/graphrag/index/operations/graph_to_dataframes.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing create_graph definition.""" +"""A module containing graph_to_dataframes method definition.""" import networkx as nx import pandas as pd diff --git a/packages/graphrag/graphrag/index/operations/summarize_communities/build_mixed_context.py b/packages/graphrag/graphrag/index/operations/summarize_communities/build_mixed_context.py index 0e0a4e39..6ae59a40 100644 --- a/packages/graphrag/graphrag/index/operations/summarize_communities/build_mixed_context.py +++ b/packages/graphrag/graphrag/index/operations/summarize_communities/build_mixed_context.py @@ -1,6 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing the build_mixed_context method definition.""" + +"""A module containing build_mixed_context method definition.""" import pandas as pd diff --git a/packages/graphrag/graphrag/index/operations/summarize_communities/summarize_communities.py b/packages/graphrag/graphrag/index/operations/summarize_communities/summarize_communities.py index f6028332..709a4e3b 100644 --- a/packages/graphrag/graphrag/index/operations/summarize_communities/summarize_communities.py +++ b/packages/graphrag/graphrag/index/operations/summarize_communities/summarize_communities.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing create_community_reports and load_strategy methods definition.""" +"""A module containing summarize_communities method definition.""" import logging from collections.abc import Callable diff --git a/packages/graphrag/graphrag/index/operations/summarize_descriptions/description_summary_extractor.py b/packages/graphrag/graphrag/index/operations/summarize_descriptions/description_summary_extractor.py index 4a999fd6..d31a1261 100644 --- a/packages/graphrag/graphrag/index/operations/summarize_descriptions/description_summary_extractor.py +++ b/packages/graphrag/graphrag/index/operations/summarize_descriptions/description_summary_extractor.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing 'GraphExtractionResult' and 'GraphExtractor' models.""" +"""A module containing 'SummarizationResult' and 'SummarizeExtractor' models.""" import json from dataclasses import dataclass diff --git a/packages/graphrag/graphrag/index/text_splitting/text_splitting.py b/packages/graphrag/graphrag/index/text_splitting/text_splitting.py index caa9153e..cb46510a 100644 --- a/packages/graphrag/graphrag/index/text_splitting/text_splitting.py +++ b/packages/graphrag/graphrag/index/text_splitting/text_splitting.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""A module containing the 'Tokenizer', 'TextSplitter', 'NoopTextSplitter' and 'TokenTextSplitter' models.""" +"""A module containing 'TokenChunkerOptions', 'TextSplitter', 'NoopTextSplitter', 'TokenTextSplitter', 'split_single_text_on_tokens', and 'split_multiple_texts_on_tokens'.""" import logging from abc import ABC, abstractmethod diff --git a/packages/graphrag/graphrag/index/utils/derive_from_rows.py b/packages/graphrag/graphrag/index/utils/derive_from_rows.py index 663d78c1..bb4e8a30 100644 --- a/packages/graphrag/graphrag/index/utils/derive_from_rows.py +++ b/packages/graphrag/graphrag/index/utils/derive_from_rows.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Microsoft Corporation. # Licensed under the MIT License -"""Apply a generic transform function to each row in a table.""" +"""A module containing derive_from_rows, derive_from_rows_asyncio_threads, and derive_from_rows_asyncio methods.""" import asyncio import inspect @@ -55,9 +55,6 @@ async def derive_from_rows( raise ValueError(msg) -"""A module containing the derive_from_rows_async method.""" - - async def derive_from_rows_asyncio_threads( input: pd.DataFrame, transform: Callable[[pd.Series], Awaitable[ItemType]], @@ -88,9 +85,6 @@ async def derive_from_rows_asyncio_threads( ) -"""A module containing the derive_from_rows_async method.""" - - async def derive_from_rows_asyncio( input: pd.DataFrame, transform: Callable[[pd.Series], Awaitable[ItemType]], diff --git a/tests/mock_provider.py b/tests/mock_provider.py index 524222dc..9ffb52fb 100644 --- a/tests/mock_provider.py +++ b/tests/mock_provider.py @@ -1,7 +1,7 @@ # Copyright (c) 2025 Microsoft Corporation. # Licensed under the MIT License -"""A module containing mock model provider definitions.""" +"""A module containing 'MockChatLLM' and 'MockEmbeddingLLM' models.""" from collections.abc import AsyncGenerator, Generator from typing import Any diff --git a/tests/unit/indexing/operations/chunk_text/test_chunk_text.py b/tests/unit/indexing/operations/chunk_text/test_chunk_text.py index 33e5b7cc..029d34fa 100644 --- a/tests/unit/indexing/operations/chunk_text/test_chunk_text.py +++ b/tests/unit/indexing/operations/chunk_text/test_chunk_text.py @@ -10,9 +10,9 @@ import pytest from graphrag.config.enums import ChunkStrategyType from graphrag.index.operations.chunk_text.chunk_text import ( _get_num_total, + _load_strategy, + _run_strategy, chunk_text, - load_strategy, - run_strategy, ) from graphrag.index.operations.chunk_text.typing import ( TextChunk, @@ -36,7 +36,7 @@ def test_get_num_total_array(): def test_load_strategy_tokens(): strategy_type = ChunkStrategyType.tokens - strategy_loaded = load_strategy(strategy_type) + strategy_loaded = _load_strategy(strategy_type) assert strategy_loaded.__name__ == "run_tokens" @@ -44,7 +44,7 @@ def test_load_strategy_tokens(): def test_load_strategy_sentence(): strategy_type = ChunkStrategyType.sentence - strategy_loaded = load_strategy(strategy_type) + strategy_loaded = _load_strategy(strategy_type) assert strategy_loaded.__name__ == "run_sentences" @@ -55,7 +55,7 @@ def test_load_strategy_none(): with pytest.raises( ValueError, match="Unknown strategy: " ): - load_strategy(strategy_type) # type: ignore + _load_strategy(strategy_type) # type: ignore def test_run_strategy_str(): @@ -71,7 +71,7 @@ def test_run_strategy_str(): ) ] - runned = run_strategy(strategy_mocked, input, config, tick) + runned = _run_strategy(strategy_mocked, input, config, tick) assert runned == ["text test for run strategy"] @@ -93,7 +93,7 @@ def test_run_strategy_arr_str(): "use for strategy", ] - runned = run_strategy(strategy_mocked, input, config, tick) + runned = _run_strategy(strategy_mocked, input, config, tick) assert runned == expected @@ -123,7 +123,7 @@ def test_run_strategy_arr_tuple(): ), ] - runned = run_strategy(strategy_mocked, input, config, tick) + runned = _run_strategy(strategy_mocked, input, config, tick) assert runned == expected @@ -153,12 +153,12 @@ def test_run_strategy_arr_tuple_same_doc(): ), ] - runned = run_strategy(strategy_mocked, input, config, tick) + runned = _run_strategy(strategy_mocked, input, config, tick) assert runned == expected -@mock.patch("graphrag.index.operations.chunk_text.chunk_text.load_strategy") -@mock.patch("graphrag.index.operations.chunk_text.chunk_text.run_strategy") +@mock.patch("graphrag.index.operations.chunk_text.chunk_text._load_strategy") +@mock.patch("graphrag.index.operations.chunk_text.chunk_text._run_strategy") @mock.patch("graphrag.index.operations.chunk_text.chunk_text.progress_ticker") def test_chunk_text(mock_progress_ticker, mock_run_strategy, mock_load_strategy): input_data = pd.DataFrame({"name": ["The Shining"]}) diff --git a/tests/unit/indexing/verbs/entities/extraction/strategies/graph_intelligence/test_gi_entity_extraction.py b/tests/unit/indexing/verbs/entities/extraction/strategies/graph_intelligence/test_gi_entity_extraction.py index 0751adb5..7d89d542 100644 --- a/tests/unit/indexing/verbs/entities/extraction/strategies/graph_intelligence/test_gi_entity_extraction.py +++ b/tests/unit/indexing/verbs/entities/extraction/strategies/graph_intelligence/test_gi_entity_extraction.py @@ -2,7 +2,7 @@ # Licensed under the MIT License import unittest -from graphrag.index.operations.extract_graph.extract_graph import run_extract_graph +from graphrag.index.operations.extract_graph.extract_graph import _run_extract_graph from graphrag.prompts.index.extract_graph import GRAPH_EXTRACTION_PROMPT from tests.unit.indexing.verbs.helpers.mock_llm import create_mock_llm @@ -22,7 +22,7 @@ SIMPLE_EXTRACTION_RESPONSE = """ class TestRunChain(unittest.IsolatedAsyncioTestCase): async def test_run_extract_graph_single_document_correct_entities_returned(self): - entities_df, _ = await run_extract_graph( + entities_df, _ = await _run_extract_graph( text="test_text", source_id="1", entity_types=["person"], @@ -39,7 +39,7 @@ class TestRunChain(unittest.IsolatedAsyncioTestCase): ) async def test_run_extract_graph_single_document_correct_edges_returned(self): - _, relationships_df = await run_extract_graph( + _, relationships_df = await _run_extract_graph( text="test_text", source_id="1", entity_types=["person"], @@ -61,7 +61,7 @@ class TestRunChain(unittest.IsolatedAsyncioTestCase): } async def test_run_extract_graph_single_document_source_ids_mapped(self): - entities_df, relationships_df = await run_extract_graph( + entities_df, relationships_df = await _run_extract_graph( text="test_text", source_id="1", entity_types=["person"],