mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
Improve CLI speed with lazy imports (#1319)
This commit is contained in:
parent
9b4f24ebce
commit
22a57d14c7
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "move import statements out of init files"
|
||||
}
|
||||
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "fix autocompletion of existing files/directory paths."
|
||||
}
|
||||
@ -20,9 +20,9 @@ Before running auto tuning, ensure you have already initialized your workspace w
|
||||
You can run the main script from the command line with various options:
|
||||
|
||||
```bash
|
||||
graphrag prompt-tune [--root ROOT] [--domain DOMAIN] [--method METHOD] [--limit LIMIT] [--language LANGUAGE] \
|
||||
graphrag prompt-tune [--root ROOT] [--config CONFIG] [--domain DOMAIN] [--selection-method METHOD] [--limit LIMIT] [--language LANGUAGE] \
|
||||
[--max-tokens MAX_TOKENS] [--chunk-size CHUNK_SIZE] [--n-subset-max N_SUBSET_MAX] [--k K] \
|
||||
[--min-examples-required MIN_EXAMPLES_REQUIRED] [--no-entity-types] [--output OUTPUT]
|
||||
[--min-examples-required MIN_EXAMPLES_REQUIRED] [--discover-entity-types] [--output OUTPUT]
|
||||
```
|
||||
|
||||
## Command-Line Options
|
||||
@ -49,7 +49,7 @@ graphrag prompt-tune [--root ROOT] [--domain DOMAIN] [--method METHOD] [--limit
|
||||
|
||||
- `--min-examples-required` (optional): The minimum number of examples required for entity extraction prompts. Default is 2.
|
||||
|
||||
- `--no-entity-types` (optional): Use untyped entity extraction generation. We recommend using this when your data covers a lot of topics or it is highly randomized.
|
||||
- `--discover-entity-types` (optional): Allow the LLM to discover and extract entities automatically. We recommend using this when your data covers a lot of topics or it is highly randomized.
|
||||
|
||||
- `--output` (optional): The folder to save the generated prompts. Default is "prompts".
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.index import run_pipeline_with_config
|
||||
from graphrag.index.run import run_pipeline_with_config
|
||||
|
||||
pipeline_file = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
|
||||
|
||||
@ -5,8 +5,8 @@ import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.index import run_pipeline, run_pipeline_with_config
|
||||
from graphrag.index.config import PipelineWorkflowReference
|
||||
from graphrag.index.config.workflow import PipelineWorkflowReference
|
||||
from graphrag.index.run import run_pipeline, run_pipeline_with_config
|
||||
|
||||
# our fake dataset
|
||||
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
|
||||
|
||||
@ -3,9 +3,10 @@
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from graphrag.index import run_pipeline, run_pipeline_with_config
|
||||
from graphrag.index.config import PipelineCSVInputConfig, PipelineWorkflowReference
|
||||
from graphrag.index.input import load_input
|
||||
from graphrag.index.config.input import PipelineCSVInputConfig
|
||||
from graphrag.index.config.workflow import PipelineWorkflowReference
|
||||
from graphrag.index.input.load_input import load_input
|
||||
from graphrag.index.run import run_pipeline, run_pipeline_with_config
|
||||
|
||||
sample_data_dir = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "../_sample_data/"
|
||||
|
||||
@ -3,6 +3,6 @@
|
||||
|
||||
"""The GraphRAG package."""
|
||||
|
||||
from .cli.main import app
|
||||
from graphrag.cli.main import app
|
||||
|
||||
app(prog_name="graphrag")
|
||||
|
||||
@ -8,7 +8,7 @@ Backwards compatibility is not guaranteed at this time.
|
||||
"""
|
||||
|
||||
from graphrag.api.index import build_index
|
||||
from graphrag.api.prompt_tune import DocSelectionType, generate_indexing_prompts
|
||||
from graphrag.api.prompt_tune import generate_indexing_prompts
|
||||
from graphrag.api.query import (
|
||||
drift_search,
|
||||
global_search,
|
||||
@ -16,6 +16,7 @@ from graphrag.api.query import (
|
||||
local_search,
|
||||
local_search_streaming,
|
||||
)
|
||||
from graphrag.prompt_tune.types import DocSelectionType
|
||||
|
||||
__all__ = [ # noqa: RUF022
|
||||
# index API
|
||||
|
||||
@ -10,13 +10,14 @@ Backwards compatibility is not guaranteed at this time.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from graphrag.config import CacheType, GraphRagConfig
|
||||
from graphrag.config.enums import CacheType
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.index.cache.noop_pipeline_cache import NoopPipelineCache
|
||||
from graphrag.index.create_pipeline_config import create_pipeline_config
|
||||
from graphrag.index.emit.types import TableEmitterType
|
||||
from graphrag.index.run import run_pipeline_with_config
|
||||
from graphrag.index.typing import PipelineRunResult
|
||||
from graphrag.logging import ProgressReporter
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.vector_stores.factory import VectorStoreType
|
||||
|
||||
|
||||
|
||||
@ -15,25 +15,32 @@ from datashaper import NoopVerbCallbacks
|
||||
from pydantic import PositiveInt, validate_call
|
||||
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.index.llm import load_llm
|
||||
from graphrag.logging import PrintProgressReporter
|
||||
from graphrag.prompt_tune.generator import (
|
||||
MAX_TOKEN_COUNT,
|
||||
create_community_summarization_prompt,
|
||||
create_entity_extraction_prompt,
|
||||
create_entity_summarization_prompt,
|
||||
detect_language,
|
||||
from graphrag.index.llm.load_llm import load_llm
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.prompt_tune.defaults import MAX_TOKEN_COUNT
|
||||
from graphrag.prompt_tune.generator.community_report_rating import (
|
||||
generate_community_report_rating,
|
||||
)
|
||||
from graphrag.prompt_tune.generator.community_report_summarization import (
|
||||
create_community_summarization_prompt,
|
||||
)
|
||||
from graphrag.prompt_tune.generator.community_reporter_role import (
|
||||
generate_community_reporter_role,
|
||||
generate_domain,
|
||||
)
|
||||
from graphrag.prompt_tune.generator.domain import generate_domain
|
||||
from graphrag.prompt_tune.generator.entity_extraction_prompt import (
|
||||
create_entity_extraction_prompt,
|
||||
)
|
||||
from graphrag.prompt_tune.generator.entity_relationship import (
|
||||
generate_entity_relationship_examples,
|
||||
generate_entity_types,
|
||||
generate_persona,
|
||||
)
|
||||
from graphrag.prompt_tune.loader import (
|
||||
MIN_CHUNK_SIZE,
|
||||
load_docs_in_chunks,
|
||||
from graphrag.prompt_tune.generator.entity_summarization_prompt import (
|
||||
create_entity_summarization_prompt,
|
||||
)
|
||||
from graphrag.prompt_tune.generator.entity_types import generate_entity_types
|
||||
from graphrag.prompt_tune.generator.language import detect_language
|
||||
from graphrag.prompt_tune.generator.persona import generate_persona
|
||||
from graphrag.prompt_tune.loader.input import MIN_CHUNK_SIZE, load_docs_in_chunks
|
||||
from graphrag.prompt_tune.types import DocSelectionType
|
||||
|
||||
|
||||
|
||||
@ -24,12 +24,12 @@ from typing import Any
|
||||
import pandas as pd
|
||||
from pydantic import validate_call
|
||||
|
||||
from graphrag.config import GraphRagConfig
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.index.config.embeddings import (
|
||||
community_full_content_embedding,
|
||||
entity_description_embedding,
|
||||
)
|
||||
from graphrag.logging import PrintProgressReporter
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.query.factories import (
|
||||
get_drift_search_engine,
|
||||
get_global_search_engine,
|
||||
@ -47,8 +47,8 @@ from graphrag.query.indexer_adapters import (
|
||||
from graphrag.query.structured_search.base import SearchResult # noqa: TCH001
|
||||
from graphrag.utils.cli import redact
|
||||
from graphrag.utils.embeddings import create_collection_name
|
||||
from graphrag.vector_stores import VectorStoreFactory, VectorStoreType
|
||||
from graphrag.vector_stores.base import BaseVectorStore
|
||||
from graphrag.vector_stores.factory import VectorStoreFactory, VectorStoreType
|
||||
|
||||
reporter = PrintProgressReporter("")
|
||||
|
||||
|
||||
@ -8,17 +8,16 @@ from typing import cast
|
||||
|
||||
from datashaper import WorkflowCallbacks
|
||||
|
||||
from graphrag.config import ReportingType
|
||||
from graphrag.index.config import (
|
||||
from graphrag.callbacks.blob_workflow_callbacks import BlobWorkflowCallbacks
|
||||
from graphrag.callbacks.console_workflow_callbacks import ConsoleWorkflowCallbacks
|
||||
from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks
|
||||
from graphrag.config.enums import ReportingType
|
||||
from graphrag.index.config.reporting import (
|
||||
PipelineBlobReportingConfig,
|
||||
PipelineFileReportingConfig,
|
||||
PipelineReportingConfig,
|
||||
)
|
||||
|
||||
from .blob_workflow_callbacks import BlobWorkflowCallbacks
|
||||
from .console_workflow_callbacks import ConsoleWorkflowCallbacks
|
||||
from .file_workflow_callbacks import FileWorkflowCallbacks
|
||||
|
||||
|
||||
def create_pipeline_reporter(
|
||||
config: PipelineReportingConfig | None, root_dir: str | None
|
||||
|
||||
@ -3,10 +3,9 @@
|
||||
|
||||
"""GlobalSearch LLM Callbacks."""
|
||||
|
||||
from graphrag.callbacks.llm_callbacks import BaseLLMCallback
|
||||
from graphrag.query.structured_search.base import SearchResult
|
||||
|
||||
from .llm_callbacks import BaseLLMCallback
|
||||
|
||||
|
||||
class GlobalSearchLLMCallback(BaseLLMCallback):
|
||||
"""GlobalSearch LLM Callbacks."""
|
||||
|
||||
@ -7,7 +7,7 @@ from typing import Any
|
||||
|
||||
from datashaper import ExecutionNode, NoopWorkflowCallbacks, Progress, TableContainer
|
||||
|
||||
from graphrag.logging import ProgressReporter
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
|
||||
|
||||
class ProgressWorkflowCallbacks(NoopWorkflowCallbacks):
|
||||
|
||||
@ -11,15 +11,15 @@ import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import graphrag.api as api
|
||||
from graphrag.config import (
|
||||
CacheType,
|
||||
enable_logging_with_config,
|
||||
load_config,
|
||||
resolve_paths,
|
||||
)
|
||||
from graphrag.config.enums import CacheType
|
||||
from graphrag.config.load_config import load_config
|
||||
from graphrag.config.logging import enable_logging_with_config
|
||||
from graphrag.config.resolve_path import resolve_paths
|
||||
from graphrag.index.emit.types import TableEmitterType
|
||||
from graphrag.index.validate_config import validate_config_names
|
||||
from graphrag.logging import ProgressReporter, ReporterType, create_progress_reporter
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logging.factories import create_progress_reporter
|
||||
from graphrag.logging.types import ReporterType
|
||||
from graphrag.utils.cli import redact
|
||||
|
||||
# Ignore warnings from numba
|
||||
|
||||
@ -6,7 +6,8 @@
|
||||
from pathlib import Path
|
||||
|
||||
from graphrag.config.init_content import INIT_DOTENV, INIT_YAML
|
||||
from graphrag.logging import ReporterType, create_progress_reporter
|
||||
from graphrag.logging.factories import create_progress_reporter
|
||||
from graphrag.logging.types import ReporterType
|
||||
from graphrag.prompts.index.claim_extraction import CLAIM_EXTRACTION_PROMPT
|
||||
from graphrag.prompts.index.community_report import (
|
||||
COMMUNITY_REPORT_PROMPT,
|
||||
|
||||
@ -3,23 +3,24 @@
|
||||
|
||||
"""CLI entrypoint."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
import typer
|
||||
|
||||
from graphrag.api import DocSelectionType
|
||||
from graphrag.index.emit.types import TableEmitterType
|
||||
from graphrag.logging import ReporterType
|
||||
from graphrag.prompt_tune.generator import MAX_TOKEN_COUNT
|
||||
from graphrag.prompt_tune.loader import MIN_CHUNK_SIZE
|
||||
|
||||
from .index import index_cli, update_cli
|
||||
from .initialize import initialize_project_at
|
||||
from .prompt_tune import prompt_tune
|
||||
from .query import run_drift_search, run_global_search, run_local_search
|
||||
from graphrag.logging.types import ReporterType
|
||||
from graphrag.prompt_tune.defaults import (
|
||||
MAX_TOKEN_COUNT,
|
||||
MIN_CHUNK_SIZE,
|
||||
N_SUBSET_MAX,
|
||||
K,
|
||||
)
|
||||
from graphrag.prompt_tune.types import DocSelectionType
|
||||
|
||||
INVALID_METHOD_ERROR = "Invalid method"
|
||||
|
||||
@ -29,6 +30,48 @@ app = typer.Typer(
|
||||
)
|
||||
|
||||
|
||||
# A workaround for typer's lack of support for proper autocompletion of file/directory paths
|
||||
# For more detail, watch
|
||||
# https://github.com/fastapi/typer/discussions/682
|
||||
# https://github.com/fastapi/typer/issues/951
|
||||
def path_autocomplete(
|
||||
file_okay: bool = True,
|
||||
dir_okay: bool = True,
|
||||
readable: bool = True,
|
||||
writable: bool = False,
|
||||
match_wildcard: str | None = None,
|
||||
) -> Callable[[str], list[str]]:
|
||||
"""Autocomplete file and directory paths."""
|
||||
|
||||
def wildcard_match(string: str, pattern: str) -> bool:
|
||||
regex = re.escape(pattern).replace(r"\?", ".").replace(r"\*", ".*")
|
||||
return re.fullmatch(regex, string) is not None
|
||||
|
||||
def completer(incomplete: str) -> list[str]:
|
||||
items = os.listdir()
|
||||
completions = []
|
||||
for item in items:
|
||||
if not file_okay and Path(item).is_file():
|
||||
continue
|
||||
if not dir_okay and Path(item).is_dir():
|
||||
continue
|
||||
if readable and not os.access(item, os.R_OK):
|
||||
continue
|
||||
if writable and not os.access(item, os.W_OK):
|
||||
continue
|
||||
completions.append(item)
|
||||
if match_wildcard:
|
||||
completions = filter(
|
||||
lambda i: wildcard_match(i, match_wildcard)
|
||||
if match_wildcard
|
||||
else False,
|
||||
completions,
|
||||
)
|
||||
return [i for i in completions if i.startswith(incomplete)]
|
||||
|
||||
return completer
|
||||
|
||||
|
||||
class SearchType(Enum):
|
||||
"""The type of search to run."""
|
||||
|
||||
@ -50,10 +93,15 @@ def _initialize_cli(
|
||||
dir_okay=True,
|
||||
writable=True,
|
||||
resolve_path=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=False, dir_okay=True, writable=True, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
],
|
||||
):
|
||||
"""Generate a default configuration file."""
|
||||
from graphrag.cli.initialize import initialize_project_at
|
||||
|
||||
initialize_project_at(path=root)
|
||||
|
||||
|
||||
@ -73,6 +121,9 @@ def _index_cli(
|
||||
dir_okay=True,
|
||||
writable=True,
|
||||
resolve_path=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=False, dir_okay=True, writable=True, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
] = Path(), # set default to current directory
|
||||
verbose: Annotated[
|
||||
@ -114,6 +165,8 @@ def _index_cli(
|
||||
] = None,
|
||||
):
|
||||
"""Build a knowledge graph index."""
|
||||
from graphrag.cli.index import index_cli
|
||||
|
||||
index_cli(
|
||||
root_dir=root,
|
||||
verbose=verbose,
|
||||
@ -181,6 +234,8 @@ def _update_cli(
|
||||
|
||||
Applies a default storage configuration (if not provided by config), saving the new index to the local file system in the `update_output` folder.
|
||||
"""
|
||||
from graphrag.cli.index import update_cli
|
||||
|
||||
update_cli(
|
||||
root_dir=root,
|
||||
verbose=verbose,
|
||||
@ -204,12 +259,21 @@ def _prompt_tune_cli(
|
||||
dir_okay=True,
|
||||
writable=True,
|
||||
resolve_path=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=False, dir_okay=True, writable=True, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
] = Path(), # set default to current directory
|
||||
config: Annotated[
|
||||
Path | None,
|
||||
typer.Option(
|
||||
help="The configuration to use.", exists=True, file_okay=True, readable=True
|
||||
help="The configuration to use.",
|
||||
exists=True,
|
||||
file_okay=True,
|
||||
readable=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=True, dir_okay=False, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
] = None,
|
||||
domain: Annotated[
|
||||
@ -226,13 +290,13 @@ def _prompt_tune_cli(
|
||||
typer.Option(
|
||||
help="The number of text chunks to embed when --selection-method=auto."
|
||||
),
|
||||
] = 300,
|
||||
] = N_SUBSET_MAX,
|
||||
k: Annotated[
|
||||
int,
|
||||
typer.Option(
|
||||
help="The maximum number of documents to select from each centroid when --selection-method=auto."
|
||||
),
|
||||
] = 15,
|
||||
] = K,
|
||||
limit: Annotated[
|
||||
int,
|
||||
typer.Option(
|
||||
@ -271,6 +335,10 @@ def _prompt_tune_cli(
|
||||
] = Path("prompts"),
|
||||
):
|
||||
"""Generate custom graphrag prompts with your own data (i.e. auto templating)."""
|
||||
import asyncio
|
||||
|
||||
from graphrag.cli.prompt_tune import prompt_tune
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
prompt_tune(
|
||||
@ -298,7 +366,13 @@ def _query_cli(
|
||||
config: Annotated[
|
||||
Path | None,
|
||||
typer.Option(
|
||||
help="The configuration to use.", exists=True, file_okay=True, readable=True
|
||||
help="The configuration to use.",
|
||||
exists=True,
|
||||
file_okay=True,
|
||||
readable=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=True, dir_okay=False, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
] = None,
|
||||
data: Annotated[
|
||||
@ -309,6 +383,9 @@ def _query_cli(
|
||||
dir_okay=True,
|
||||
readable=True,
|
||||
resolve_path=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=False, dir_okay=True, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
] = None,
|
||||
root: Annotated[
|
||||
@ -319,6 +396,9 @@ def _query_cli(
|
||||
dir_okay=True,
|
||||
writable=True,
|
||||
resolve_path=True,
|
||||
autocompletion=path_autocomplete(
|
||||
file_okay=False, dir_okay=True, match_wildcard="*"
|
||||
),
|
||||
),
|
||||
] = Path(), # set default to current directory
|
||||
community_level: Annotated[
|
||||
@ -342,6 +422,8 @@ def _query_cli(
|
||||
] = False,
|
||||
):
|
||||
"""Query a knowledge graph index."""
|
||||
from graphrag.cli.query import run_drift_search, run_global_search, run_local_search
|
||||
|
||||
match method:
|
||||
case SearchType.LOCAL:
|
||||
run_local_search(
|
||||
|
||||
@ -6,8 +6,8 @@
|
||||
from pathlib import Path
|
||||
|
||||
import graphrag.api as api
|
||||
from graphrag.config import load_config
|
||||
from graphrag.logging import PrintProgressReporter
|
||||
from graphrag.config.load_config import load_config
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.prompt_tune.generator.community_report_summarization import (
|
||||
COMMUNITY_SUMMARIZATION_FILENAME,
|
||||
)
|
||||
|
||||
@ -10,9 +10,11 @@ from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
import graphrag.api as api
|
||||
from graphrag.config import GraphRagConfig, load_config, resolve_paths
|
||||
from graphrag.config.load_config import load_config
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.resolve_path import resolve_paths
|
||||
from graphrag.index.create_pipeline_config import create_pipeline_config
|
||||
from graphrag.logging import PrintProgressReporter
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.utils.storage import _create_storage, _load_table_from_storage
|
||||
|
||||
reporter = PrintProgressReporter("")
|
||||
|
||||
@ -2,134 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""The Indexing Engine default config package root."""
|
||||
|
||||
from .config_file_loader import load_config_from_file, search_for_config_in_root_dir
|
||||
from .create_graphrag_config import (
|
||||
create_graphrag_config,
|
||||
)
|
||||
from .enums import (
|
||||
CacheType,
|
||||
InputFileType,
|
||||
InputType,
|
||||
LLMType,
|
||||
ReportingType,
|
||||
StorageType,
|
||||
TextEmbeddingTarget,
|
||||
)
|
||||
from .errors import (
|
||||
ApiKeyMissingError,
|
||||
AzureApiBaseMissingError,
|
||||
AzureDeploymentNameMissingError,
|
||||
)
|
||||
from .input_models import (
|
||||
CacheConfigInput,
|
||||
ChunkingConfigInput,
|
||||
ClaimExtractionConfigInput,
|
||||
ClusterGraphConfigInput,
|
||||
CommunityReportsConfigInput,
|
||||
EmbedGraphConfigInput,
|
||||
EntityExtractionConfigInput,
|
||||
GlobalSearchConfigInput,
|
||||
GraphRagConfigInput,
|
||||
InputConfigInput,
|
||||
LLMConfigInput,
|
||||
LLMParametersInput,
|
||||
LocalSearchConfigInput,
|
||||
ParallelizationParametersInput,
|
||||
ReportingConfigInput,
|
||||
SnapshotsConfigInput,
|
||||
StorageConfigInput,
|
||||
SummarizeDescriptionsConfigInput,
|
||||
TextEmbeddingConfigInput,
|
||||
UmapConfigInput,
|
||||
)
|
||||
from .load_config import load_config
|
||||
from .logging import enable_logging_with_config
|
||||
from .models import (
|
||||
CacheConfig,
|
||||
ChunkingConfig,
|
||||
ClaimExtractionConfig,
|
||||
ClusterGraphConfig,
|
||||
CommunityReportsConfig,
|
||||
DRIFTSearchConfig,
|
||||
EmbedGraphConfig,
|
||||
EntityExtractionConfig,
|
||||
GlobalSearchConfig,
|
||||
GraphRagConfig,
|
||||
InputConfig,
|
||||
LLMConfig,
|
||||
LLMParameters,
|
||||
LocalSearchConfig,
|
||||
ParallelizationParameters,
|
||||
ReportingConfig,
|
||||
SnapshotsConfig,
|
||||
StorageConfig,
|
||||
SummarizeDescriptionsConfig,
|
||||
TextEmbeddingConfig,
|
||||
UmapConfig,
|
||||
)
|
||||
from .read_dotenv import read_dotenv
|
||||
from .resolve_path import resolve_path, resolve_paths
|
||||
|
||||
__all__ = [
|
||||
"ApiKeyMissingError",
|
||||
"AzureApiBaseMissingError",
|
||||
"AzureDeploymentNameMissingError",
|
||||
"CacheConfig",
|
||||
"CacheConfigInput",
|
||||
"CacheType",
|
||||
"ChunkingConfig",
|
||||
"ChunkingConfigInput",
|
||||
"ClaimExtractionConfig",
|
||||
"ClaimExtractionConfigInput",
|
||||
"ClusterGraphConfig",
|
||||
"ClusterGraphConfigInput",
|
||||
"CommunityReportsConfig",
|
||||
"CommunityReportsConfigInput",
|
||||
"DRIFTSearchConfig",
|
||||
"EmbedGraphConfig",
|
||||
"EmbedGraphConfigInput",
|
||||
"EntityExtractionConfig",
|
||||
"EntityExtractionConfigInput",
|
||||
"GlobalSearchConfig",
|
||||
"GlobalSearchConfigInput",
|
||||
"GraphRagConfig",
|
||||
"GraphRagConfigInput",
|
||||
"InputConfig",
|
||||
"InputConfigInput",
|
||||
"InputFileType",
|
||||
"InputType",
|
||||
"LLMConfig",
|
||||
"LLMConfigInput",
|
||||
"LLMParameters",
|
||||
"LLMParametersInput",
|
||||
"LLMType",
|
||||
"LocalSearchConfig",
|
||||
"LocalSearchConfigInput",
|
||||
"ParallelizationParameters",
|
||||
"ParallelizationParametersInput",
|
||||
"ReportingConfig",
|
||||
"ReportingConfigInput",
|
||||
"ReportingType",
|
||||
"SnapshotsConfig",
|
||||
"SnapshotsConfigInput",
|
||||
"StorageConfig",
|
||||
"StorageConfigInput",
|
||||
"StorageType",
|
||||
"StorageType",
|
||||
"SummarizeDescriptionsConfig",
|
||||
"SummarizeDescriptionsConfigInput",
|
||||
"TextEmbeddingConfig",
|
||||
"TextEmbeddingConfigInput",
|
||||
"TextEmbeddingTarget",
|
||||
"UmapConfig",
|
||||
"UmapConfigInput",
|
||||
"create_graphrag_config",
|
||||
"enable_logging_with_config",
|
||||
"load_config",
|
||||
"load_config_from_file",
|
||||
"read_dotenv",
|
||||
"resolve_path",
|
||||
"resolve_paths",
|
||||
"search_for_config_in_root_dir",
|
||||
]
|
||||
|
||||
@ -9,8 +9,8 @@ from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from .create_graphrag_config import create_graphrag_config
|
||||
from .models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.create_graphrag_config import create_graphrag_config
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
|
||||
_default_config_files = ["settings.yaml", "settings.yml", "settings.json"]
|
||||
|
||||
|
||||
@ -13,8 +13,7 @@ from environs import Env
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .enums import (
|
||||
from graphrag.config.enums import (
|
||||
CacheType,
|
||||
InputFileType,
|
||||
InputType,
|
||||
@ -23,39 +22,37 @@ from .enums import (
|
||||
StorageType,
|
||||
TextEmbeddingTarget,
|
||||
)
|
||||
from .environment_reader import EnvironmentReader
|
||||
from .errors import (
|
||||
from graphrag.config.environment_reader import EnvironmentReader
|
||||
from graphrag.config.errors import (
|
||||
ApiKeyMissingError,
|
||||
AzureApiBaseMissingError,
|
||||
AzureDeploymentNameMissingError,
|
||||
)
|
||||
from .input_models import (
|
||||
GraphRagConfigInput,
|
||||
LLMConfigInput,
|
||||
)
|
||||
from .models import (
|
||||
CacheConfig,
|
||||
ChunkingConfig,
|
||||
ClaimExtractionConfig,
|
||||
ClusterGraphConfig,
|
||||
CommunityReportsConfig,
|
||||
DRIFTSearchConfig,
|
||||
EmbedGraphConfig,
|
||||
EntityExtractionConfig,
|
||||
GlobalSearchConfig,
|
||||
GraphRagConfig,
|
||||
InputConfig,
|
||||
LLMParameters,
|
||||
LocalSearchConfig,
|
||||
ParallelizationParameters,
|
||||
ReportingConfig,
|
||||
SnapshotsConfig,
|
||||
StorageConfig,
|
||||
from graphrag.config.input_models.graphrag_config_input import GraphRagConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
from graphrag.config.models.cache_config import CacheConfig
|
||||
from graphrag.config.models.chunking_config import ChunkingConfig
|
||||
from graphrag.config.models.claim_extraction_config import ClaimExtractionConfig
|
||||
from graphrag.config.models.cluster_graph_config import ClusterGraphConfig
|
||||
from graphrag.config.models.community_reports_config import CommunityReportsConfig
|
||||
from graphrag.config.models.drift_search_config import DRIFTSearchConfig
|
||||
from graphrag.config.models.embed_graph_config import EmbedGraphConfig
|
||||
from graphrag.config.models.entity_extraction_config import EntityExtractionConfig
|
||||
from graphrag.config.models.global_search_config import GlobalSearchConfig
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.models.input_config import InputConfig
|
||||
from graphrag.config.models.llm_parameters import LLMParameters
|
||||
from graphrag.config.models.local_search_config import LocalSearchConfig
|
||||
from graphrag.config.models.parallelization_parameters import ParallelizationParameters
|
||||
from graphrag.config.models.reporting_config import ReportingConfig
|
||||
from graphrag.config.models.snapshots_config import SnapshotsConfig
|
||||
from graphrag.config.models.storage_config import StorageConfig
|
||||
from graphrag.config.models.summarize_descriptions_config import (
|
||||
SummarizeDescriptionsConfig,
|
||||
TextEmbeddingConfig,
|
||||
UmapConfig,
|
||||
)
|
||||
from .read_dotenv import read_dotenv
|
||||
from graphrag.config.models.text_embedding_config import TextEmbeddingConfig
|
||||
from graphrag.config.models.umap_config import UmapConfig
|
||||
from graphrag.config.read_dotenv import read_dotenv
|
||||
|
||||
InputModelValidator = TypeAdapter(GraphRagConfigInput)
|
||||
|
||||
|
||||
@ -7,9 +7,7 @@ from pathlib import Path
|
||||
|
||||
from datashaper import AsyncType
|
||||
|
||||
from graphrag.vector_stores import VectorStoreType
|
||||
|
||||
from .enums import (
|
||||
from graphrag.config.enums import (
|
||||
CacheType,
|
||||
InputFileType,
|
||||
InputType,
|
||||
@ -18,6 +16,7 @@ from .enums import (
|
||||
StorageType,
|
||||
TextEmbeddingTarget,
|
||||
)
|
||||
from graphrag.vector_stores.factory import VectorStoreType
|
||||
|
||||
ASYNC_MODE = AsyncType.Threaded
|
||||
ENCODING_MODEL = "cl100k_base"
|
||||
|
||||
@ -2,49 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Interfaces for Default Config parameterization."""
|
||||
|
||||
from .cache_config_input import CacheConfigInput
|
||||
from .chunking_config_input import ChunkingConfigInput
|
||||
from .claim_extraction_config_input import ClaimExtractionConfigInput
|
||||
from .cluster_graph_config_input import ClusterGraphConfigInput
|
||||
from .community_reports_config_input import CommunityReportsConfigInput
|
||||
from .embed_graph_config_input import EmbedGraphConfigInput
|
||||
from .entity_extraction_config_input import EntityExtractionConfigInput
|
||||
from .global_search_config_input import GlobalSearchConfigInput
|
||||
from .graphrag_config_input import GraphRagConfigInput
|
||||
from .input_config_input import InputConfigInput
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from .llm_parameters_input import LLMParametersInput
|
||||
from .local_search_config_input import LocalSearchConfigInput
|
||||
from .parallelization_parameters_input import ParallelizationParametersInput
|
||||
from .reporting_config_input import ReportingConfigInput
|
||||
from .snapshots_config_input import SnapshotsConfigInput
|
||||
from .storage_config_input import StorageConfigInput
|
||||
from .summarize_descriptions_config_input import (
|
||||
SummarizeDescriptionsConfigInput,
|
||||
)
|
||||
from .text_embedding_config_input import TextEmbeddingConfigInput
|
||||
from .umap_config_input import UmapConfigInput
|
||||
|
||||
__all__ = [
|
||||
"CacheConfigInput",
|
||||
"ChunkingConfigInput",
|
||||
"ClaimExtractionConfigInput",
|
||||
"ClusterGraphConfigInput",
|
||||
"CommunityReportsConfigInput",
|
||||
"EmbedGraphConfigInput",
|
||||
"EntityExtractionConfigInput",
|
||||
"GlobalSearchConfigInput",
|
||||
"GraphRagConfigInput",
|
||||
"InputConfigInput",
|
||||
"LLMConfigInput",
|
||||
"LLMParametersInput",
|
||||
"LocalSearchConfigInput",
|
||||
"ParallelizationParametersInput",
|
||||
"ReportingConfigInput",
|
||||
"SnapshotsConfigInput",
|
||||
"StorageConfigInput",
|
||||
"SummarizeDescriptionsConfigInput",
|
||||
"TextEmbeddingConfigInput",
|
||||
"UmapConfigInput",
|
||||
]
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
|
||||
|
||||
class ClaimExtractionConfigInput(LLMConfigInput):
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
|
||||
|
||||
class CommunityReportsConfigInput(LLMConfigInput):
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
|
||||
|
||||
class EntityExtractionConfigInput(LLMConfigInput):
|
||||
|
||||
@ -5,25 +5,39 @@
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from .cache_config_input import CacheConfigInput
|
||||
from .chunking_config_input import ChunkingConfigInput
|
||||
from .claim_extraction_config_input import ClaimExtractionConfigInput
|
||||
from .cluster_graph_config_input import ClusterGraphConfigInput
|
||||
from .community_reports_config_input import CommunityReportsConfigInput
|
||||
from .embed_graph_config_input import EmbedGraphConfigInput
|
||||
from .entity_extraction_config_input import EntityExtractionConfigInput
|
||||
from .global_search_config_input import GlobalSearchConfigInput
|
||||
from .input_config_input import InputConfigInput
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from .local_search_config_input import LocalSearchConfigInput
|
||||
from .reporting_config_input import ReportingConfigInput
|
||||
from .snapshots_config_input import SnapshotsConfigInput
|
||||
from .storage_config_input import StorageConfigInput
|
||||
from .summarize_descriptions_config_input import (
|
||||
from graphrag.config.input_models.cache_config_input import CacheConfigInput
|
||||
from graphrag.config.input_models.chunking_config_input import ChunkingConfigInput
|
||||
from graphrag.config.input_models.claim_extraction_config_input import (
|
||||
ClaimExtractionConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.cluster_graph_config_input import (
|
||||
ClusterGraphConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.community_reports_config_input import (
|
||||
CommunityReportsConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.embed_graph_config_input import EmbedGraphConfigInput
|
||||
from graphrag.config.input_models.entity_extraction_config_input import (
|
||||
EntityExtractionConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.global_search_config_input import (
|
||||
GlobalSearchConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.input_config_input import InputConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
from graphrag.config.input_models.local_search_config_input import (
|
||||
LocalSearchConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.reporting_config_input import ReportingConfigInput
|
||||
from graphrag.config.input_models.snapshots_config_input import SnapshotsConfigInput
|
||||
from graphrag.config.input_models.storage_config_input import StorageConfigInput
|
||||
from graphrag.config.input_models.summarize_descriptions_config_input import (
|
||||
SummarizeDescriptionsConfigInput,
|
||||
)
|
||||
from .text_embedding_config_input import TextEmbeddingConfigInput
|
||||
from .umap_config_input import UmapConfigInput
|
||||
from graphrag.config.input_models.text_embedding_config_input import (
|
||||
TextEmbeddingConfigInput,
|
||||
)
|
||||
from graphrag.config.input_models.umap_config_input import UmapConfigInput
|
||||
|
||||
|
||||
class GraphRagConfigInput(LLMConfigInput):
|
||||
|
||||
@ -6,8 +6,10 @@
|
||||
from datashaper import AsyncType
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
from .llm_parameters_input import LLMParametersInput
|
||||
from .parallelization_parameters_input import ParallelizationParametersInput
|
||||
from graphrag.config.input_models.llm_parameters_input import LLMParametersInput
|
||||
from graphrag.config.input_models.parallelization_parameters_input import (
|
||||
ParallelizationParametersInput,
|
||||
)
|
||||
|
||||
|
||||
class LLMConfigInput(TypedDict):
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
|
||||
|
||||
class SummarizeDescriptionsConfigInput(LLMConfigInput):
|
||||
|
||||
@ -8,8 +8,7 @@ from typing_extensions import NotRequired
|
||||
from graphrag.config.enums import (
|
||||
TextEmbeddingTarget,
|
||||
)
|
||||
|
||||
from .llm_config_input import LLMConfigInput
|
||||
from graphrag.config.input_models.llm_config_input import LLMConfigInput
|
||||
|
||||
|
||||
class TextEmbeddingConfigInput(LLMConfigInput):
|
||||
|
||||
@ -5,9 +5,12 @@
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .config_file_loader import load_config_from_file, search_for_config_in_root_dir
|
||||
from .create_graphrag_config import create_graphrag_config
|
||||
from .models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.config_file_loader import (
|
||||
load_config_from_file,
|
||||
search_for_config_in_root_dir,
|
||||
)
|
||||
from graphrag.config.create_graphrag_config import create_graphrag_config
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
|
||||
|
||||
def load_config(
|
||||
|
||||
@ -6,8 +6,8 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from .enums import ReportingType
|
||||
from .models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.enums import ReportingType
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
|
||||
|
||||
def enable_logging(log_filepath: str | Path, verbose: bool = False) -> None:
|
||||
|
||||
@ -2,49 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Interfaces for Default Config parameterization."""
|
||||
|
||||
from .cache_config import CacheConfig
|
||||
from .chunking_config import ChunkingConfig
|
||||
from .claim_extraction_config import ClaimExtractionConfig
|
||||
from .cluster_graph_config import ClusterGraphConfig
|
||||
from .community_reports_config import CommunityReportsConfig
|
||||
from .drift_search_config import DRIFTSearchConfig
|
||||
from .embed_graph_config import EmbedGraphConfig
|
||||
from .entity_extraction_config import EntityExtractionConfig
|
||||
from .global_search_config import GlobalSearchConfig
|
||||
from .graph_rag_config import GraphRagConfig
|
||||
from .input_config import InputConfig
|
||||
from .llm_config import LLMConfig
|
||||
from .llm_parameters import LLMParameters
|
||||
from .local_search_config import LocalSearchConfig
|
||||
from .parallelization_parameters import ParallelizationParameters
|
||||
from .reporting_config import ReportingConfig
|
||||
from .snapshots_config import SnapshotsConfig
|
||||
from .storage_config import StorageConfig
|
||||
from .summarize_descriptions_config import SummarizeDescriptionsConfig
|
||||
from .text_embedding_config import TextEmbeddingConfig
|
||||
from .umap_config import UmapConfig
|
||||
|
||||
__all__ = [
|
||||
"CacheConfig",
|
||||
"ChunkingConfig",
|
||||
"ClaimExtractionConfig",
|
||||
"ClusterGraphConfig",
|
||||
"CommunityReportsConfig",
|
||||
"DRIFTSearchConfig",
|
||||
"EmbedGraphConfig",
|
||||
"EntityExtractionConfig",
|
||||
"GlobalSearchConfig",
|
||||
"GraphRagConfig",
|
||||
"InputConfig",
|
||||
"LLMConfig",
|
||||
"LLMParameters",
|
||||
"LocalSearchConfig",
|
||||
"ParallelizationParameters",
|
||||
"ReportingConfig",
|
||||
"SnapshotsConfig",
|
||||
"StorageConfig",
|
||||
"SummarizeDescriptionsConfig",
|
||||
"TextEmbeddingConfig",
|
||||
"UmapConfig",
|
||||
]
|
||||
|
||||
@ -8,8 +8,7 @@ from pathlib import Path
|
||||
from pydantic import Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .llm_config import LLMConfig
|
||||
from graphrag.config.models.llm_config import LLMConfig
|
||||
|
||||
|
||||
class ClaimExtractionConfig(LLMConfig):
|
||||
|
||||
@ -8,8 +8,7 @@ from pathlib import Path
|
||||
from pydantic import Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .llm_config import LLMConfig
|
||||
from graphrag.config.models.llm_config import LLMConfig
|
||||
|
||||
|
||||
class CommunityReportsConfig(LLMConfig):
|
||||
|
||||
@ -8,8 +8,7 @@ from pathlib import Path
|
||||
from pydantic import Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .llm_config import LLMConfig
|
||||
from graphrag.config.models.llm_config import LLMConfig
|
||||
|
||||
|
||||
class EntityExtractionConfig(LLMConfig):
|
||||
|
||||
@ -7,27 +7,26 @@ from devtools import pformat
|
||||
from pydantic import Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .cache_config import CacheConfig
|
||||
from .chunking_config import ChunkingConfig
|
||||
from .claim_extraction_config import ClaimExtractionConfig
|
||||
from .cluster_graph_config import ClusterGraphConfig
|
||||
from .community_reports_config import CommunityReportsConfig
|
||||
from .drift_search_config import DRIFTSearchConfig
|
||||
from .embed_graph_config import EmbedGraphConfig
|
||||
from .entity_extraction_config import EntityExtractionConfig
|
||||
from .global_search_config import GlobalSearchConfig
|
||||
from .input_config import InputConfig
|
||||
from .llm_config import LLMConfig
|
||||
from .local_search_config import LocalSearchConfig
|
||||
from .reporting_config import ReportingConfig
|
||||
from .snapshots_config import SnapshotsConfig
|
||||
from .storage_config import StorageConfig
|
||||
from .summarize_descriptions_config import (
|
||||
from graphrag.config.models.cache_config import CacheConfig
|
||||
from graphrag.config.models.chunking_config import ChunkingConfig
|
||||
from graphrag.config.models.claim_extraction_config import ClaimExtractionConfig
|
||||
from graphrag.config.models.cluster_graph_config import ClusterGraphConfig
|
||||
from graphrag.config.models.community_reports_config import CommunityReportsConfig
|
||||
from graphrag.config.models.drift_search_config import DRIFTSearchConfig
|
||||
from graphrag.config.models.embed_graph_config import EmbedGraphConfig
|
||||
from graphrag.config.models.entity_extraction_config import EntityExtractionConfig
|
||||
from graphrag.config.models.global_search_config import GlobalSearchConfig
|
||||
from graphrag.config.models.input_config import InputConfig
|
||||
from graphrag.config.models.llm_config import LLMConfig
|
||||
from graphrag.config.models.local_search_config import LocalSearchConfig
|
||||
from graphrag.config.models.reporting_config import ReportingConfig
|
||||
from graphrag.config.models.snapshots_config import SnapshotsConfig
|
||||
from graphrag.config.models.storage_config import StorageConfig
|
||||
from graphrag.config.models.summarize_descriptions_config import (
|
||||
SummarizeDescriptionsConfig,
|
||||
)
|
||||
from .text_embedding_config import TextEmbeddingConfig
|
||||
from .umap_config import UmapConfig
|
||||
from graphrag.config.models.text_embedding_config import TextEmbeddingConfig
|
||||
from graphrag.config.models.umap_config import UmapConfig
|
||||
|
||||
|
||||
class GraphRagConfig(LLMConfig):
|
||||
|
||||
@ -7,9 +7,8 @@ from datashaper import AsyncType
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .llm_parameters import LLMParameters
|
||||
from .parallelization_parameters import ParallelizationParameters
|
||||
from graphrag.config.models.llm_parameters import LLMParameters
|
||||
from graphrag.config.models.parallelization_parameters import ParallelizationParameters
|
||||
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
|
||||
@ -8,8 +8,7 @@ from pathlib import Path
|
||||
from pydantic import Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
|
||||
from .llm_config import LLMConfig
|
||||
from graphrag.config.models.llm_config import LLMConfig
|
||||
|
||||
|
||||
class SummarizeDescriptionsConfig(LLMConfig):
|
||||
|
||||
@ -7,8 +7,7 @@ from pydantic import Field
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
from graphrag.config.enums import TextEmbeddingTarget
|
||||
|
||||
from .llm_config import LLMConfig
|
||||
from graphrag.config.models.llm_config import LLMConfig
|
||||
|
||||
|
||||
class TextEmbeddingConfig(LLMConfig):
|
||||
|
||||
@ -7,8 +7,8 @@ import re
|
||||
from pathlib import Path
|
||||
from string import Template
|
||||
|
||||
from .enums import ReportingType, StorageType
|
||||
from .models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.enums import ReportingType, StorageType
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
|
||||
|
||||
def _resolve_timestamp_path_with_value(path: str | Path, timestamp_value: str) -> Path:
|
||||
|
||||
@ -1,78 +1,4 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""The Indexing Engine package root."""
|
||||
|
||||
from .cache import PipelineCache
|
||||
from .config import (
|
||||
PipelineBlobCacheConfig,
|
||||
PipelineBlobReportingConfig,
|
||||
PipelineBlobStorageConfig,
|
||||
PipelineCacheConfig,
|
||||
PipelineCacheConfigTypes,
|
||||
PipelineConfig,
|
||||
PipelineConsoleReportingConfig,
|
||||
PipelineCSVInputConfig,
|
||||
PipelineFileCacheConfig,
|
||||
PipelineFileReportingConfig,
|
||||
PipelineFileStorageConfig,
|
||||
PipelineInputConfig,
|
||||
PipelineInputConfigTypes,
|
||||
PipelineMemoryCacheConfig,
|
||||
PipelineMemoryStorageConfig,
|
||||
PipelineNoneCacheConfig,
|
||||
PipelineReportingConfig,
|
||||
PipelineReportingConfigTypes,
|
||||
PipelineStorageConfig,
|
||||
PipelineStorageConfigTypes,
|
||||
PipelineTextInputConfig,
|
||||
PipelineWorkflowConfig,
|
||||
PipelineWorkflowReference,
|
||||
PipelineWorkflowStep,
|
||||
)
|
||||
from .create_pipeline_config import create_pipeline_config
|
||||
from .errors import (
|
||||
NoWorkflowsDefinedError,
|
||||
UndefinedWorkflowError,
|
||||
UnknownWorkflowError,
|
||||
)
|
||||
from .load_pipeline_config import load_pipeline_config
|
||||
from .run import run_pipeline, run_pipeline_with_config
|
||||
from .storage import PipelineStorage
|
||||
|
||||
__all__ = [
|
||||
"NoWorkflowsDefinedError",
|
||||
"PipelineBlobCacheConfig",
|
||||
"PipelineBlobCacheConfig",
|
||||
"PipelineBlobReportingConfig",
|
||||
"PipelineBlobStorageConfig",
|
||||
"PipelineCSVInputConfig",
|
||||
"PipelineCache",
|
||||
"PipelineCacheConfig",
|
||||
"PipelineCacheConfigTypes",
|
||||
"PipelineConfig",
|
||||
"PipelineConsoleReportingConfig",
|
||||
"PipelineFileCacheConfig",
|
||||
"PipelineFileReportingConfig",
|
||||
"PipelineFileStorageConfig",
|
||||
"PipelineInputConfig",
|
||||
"PipelineInputConfigTypes",
|
||||
"PipelineMemoryCacheConfig",
|
||||
"PipelineMemoryStorageConfig",
|
||||
"PipelineNoneCacheConfig",
|
||||
"PipelineReportingConfig",
|
||||
"PipelineReportingConfigTypes",
|
||||
"PipelineStorage",
|
||||
"PipelineStorageConfig",
|
||||
"PipelineStorageConfigTypes",
|
||||
"PipelineTextInputConfig",
|
||||
"PipelineWorkflowConfig",
|
||||
"PipelineWorkflowReference",
|
||||
"PipelineWorkflowStep",
|
||||
"UndefinedWorkflowError",
|
||||
"UnknownWorkflowError",
|
||||
"create_pipeline_config",
|
||||
"load_pipeline_config",
|
||||
"run_pipeline",
|
||||
"run_pipeline_with_config",
|
||||
]
|
||||
"""The indexing engine package root."""
|
||||
|
||||
14
graphrag/index/cache/__init__.py
vendored
14
graphrag/index/cache/__init__.py
vendored
@ -2,17 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""The Indexing Engine cache package root."""
|
||||
|
||||
from .json_pipeline_cache import JsonPipelineCache
|
||||
from .load_cache import load_cache
|
||||
from .memory_pipeline_cache import InMemoryCache
|
||||
from .noop_pipeline_cache import NoopPipelineCache
|
||||
from .pipeline_cache import PipelineCache
|
||||
|
||||
__all__ = [
|
||||
"InMemoryCache",
|
||||
"JsonPipelineCache",
|
||||
"NoopPipelineCache",
|
||||
"PipelineCache",
|
||||
"load_cache",
|
||||
]
|
||||
|
||||
5
graphrag/index/cache/json_pipeline_cache.py
vendored
5
graphrag/index/cache/json_pipeline_cache.py
vendored
@ -6,9 +6,8 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
|
||||
from .pipeline_cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
|
||||
class JsonPipelineCache(PipelineCache):
|
||||
|
||||
11
graphrag/index/cache/load_cache.py
vendored
11
graphrag/index/cache/load_cache.py
vendored
@ -12,16 +12,17 @@ from graphrag.index.config.cache import (
|
||||
PipelineBlobCacheConfig,
|
||||
PipelineFileCacheConfig,
|
||||
)
|
||||
from graphrag.index.storage import BlobPipelineStorage, FilePipelineStorage
|
||||
from graphrag.index.storage.blob_pipeline_storage import BlobPipelineStorage
|
||||
from graphrag.index.storage.file_pipeline_storage import FilePipelineStorage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from graphrag.index.config import (
|
||||
from graphrag.index.config.cache import (
|
||||
PipelineCacheConfig,
|
||||
)
|
||||
|
||||
from .json_pipeline_cache import JsonPipelineCache
|
||||
from .memory_pipeline_cache import create_memory_cache
|
||||
from .noop_pipeline_cache import NoopPipelineCache
|
||||
from graphrag.index.cache.json_pipeline_cache import JsonPipelineCache
|
||||
from graphrag.index.cache.memory_pipeline_cache import create_memory_cache
|
||||
from graphrag.index.cache.noop_pipeline_cache import NoopPipelineCache
|
||||
|
||||
|
||||
def load_cache(config: PipelineCacheConfig | None, root_dir: str | None):
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .pipeline_cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
|
||||
|
||||
class InMemoryCache(PipelineCache):
|
||||
|
||||
2
graphrag/index/cache/noop_pipeline_cache.py
vendored
2
graphrag/index/cache/noop_pipeline_cache.py
vendored
@ -5,7 +5,7 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .pipeline_cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
|
||||
|
||||
class NoopPipelineCache(PipelineCache):
|
||||
|
||||
@ -2,90 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""The Indexing Engine config typing package root."""
|
||||
|
||||
from .cache import (
|
||||
PipelineBlobCacheConfig,
|
||||
PipelineCacheConfig,
|
||||
PipelineCacheConfigTypes,
|
||||
PipelineFileCacheConfig,
|
||||
PipelineMemoryCacheConfig,
|
||||
PipelineNoneCacheConfig,
|
||||
)
|
||||
from .embeddings import (
|
||||
all_embeddings,
|
||||
community_full_content_embedding,
|
||||
community_summary_embedding,
|
||||
community_title_embedding,
|
||||
document_text_embedding,
|
||||
entity_description_embedding,
|
||||
entity_title_embedding,
|
||||
relationship_description_embedding,
|
||||
required_embeddings,
|
||||
text_unit_text_embedding,
|
||||
)
|
||||
from .input import (
|
||||
PipelineCSVInputConfig,
|
||||
PipelineInputConfig,
|
||||
PipelineInputConfigTypes,
|
||||
PipelineTextInputConfig,
|
||||
)
|
||||
from .pipeline import PipelineConfig
|
||||
from .reporting import (
|
||||
PipelineBlobReportingConfig,
|
||||
PipelineConsoleReportingConfig,
|
||||
PipelineFileReportingConfig,
|
||||
PipelineReportingConfig,
|
||||
PipelineReportingConfigTypes,
|
||||
)
|
||||
from .storage import (
|
||||
PipelineBlobStorageConfig,
|
||||
PipelineFileStorageConfig,
|
||||
PipelineMemoryStorageConfig,
|
||||
PipelineStorageConfig,
|
||||
PipelineStorageConfigTypes,
|
||||
)
|
||||
from .workflow import (
|
||||
PipelineWorkflowConfig,
|
||||
PipelineWorkflowReference,
|
||||
PipelineWorkflowStep,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"PipelineBlobCacheConfig",
|
||||
"PipelineBlobReportingConfig",
|
||||
"PipelineBlobStorageConfig",
|
||||
"PipelineCSVInputConfig",
|
||||
"PipelineCacheConfig",
|
||||
"PipelineCacheConfigTypes",
|
||||
"PipelineCacheConfigTypes",
|
||||
"PipelineCacheConfigTypes",
|
||||
"PipelineConfig",
|
||||
"PipelineConsoleReportingConfig",
|
||||
"PipelineFileCacheConfig",
|
||||
"PipelineFileReportingConfig",
|
||||
"PipelineFileStorageConfig",
|
||||
"PipelineInputConfig",
|
||||
"PipelineInputConfigTypes",
|
||||
"PipelineMemoryCacheConfig",
|
||||
"PipelineMemoryCacheConfig",
|
||||
"PipelineMemoryStorageConfig",
|
||||
"PipelineNoneCacheConfig",
|
||||
"PipelineReportingConfig",
|
||||
"PipelineReportingConfigTypes",
|
||||
"PipelineStorageConfig",
|
||||
"PipelineStorageConfigTypes",
|
||||
"PipelineTextInputConfig",
|
||||
"PipelineWorkflowConfig",
|
||||
"PipelineWorkflowReference",
|
||||
"PipelineWorkflowStep",
|
||||
"all_embeddings",
|
||||
"community_full_content_embedding",
|
||||
"community_summary_embedding",
|
||||
"community_title_embedding",
|
||||
"document_text_embedding",
|
||||
"entity_description_embedding",
|
||||
"entity_title_embedding",
|
||||
"relationship_description_embedding",
|
||||
"required_embeddings",
|
||||
"text_unit_text_embedding",
|
||||
]
|
||||
|
||||
@ -11,8 +11,7 @@ from pydantic import BaseModel
|
||||
from pydantic import Field as pydantic_Field
|
||||
|
||||
from graphrag.config.enums import InputFileType, InputType
|
||||
|
||||
from .workflow import PipelineWorkflowStep
|
||||
from graphrag.index.config.workflow import PipelineWorkflowStep
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@ -9,11 +9,11 @@ from devtools import pformat
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field as pydantic_Field
|
||||
|
||||
from .cache import PipelineCacheConfigTypes
|
||||
from .input import PipelineInputConfigTypes
|
||||
from .reporting import PipelineReportingConfigTypes
|
||||
from .storage import PipelineStorageConfigTypes
|
||||
from .workflow import PipelineWorkflowReference
|
||||
from graphrag.index.config.cache import PipelineCacheConfigTypes
|
||||
from graphrag.index.config.input import PipelineInputConfigTypes
|
||||
from graphrag.index.config.reporting import PipelineReportingConfigTypes
|
||||
from graphrag.index.config.storage import PipelineStorageConfigTypes
|
||||
from graphrag.index.config.workflow import PipelineWorkflowReference
|
||||
|
||||
|
||||
class PipelineConfig(BaseModel):
|
||||
|
||||
@ -7,8 +7,8 @@
|
||||
from dataclasses import dataclass as dc_dataclass
|
||||
from dataclasses import field
|
||||
|
||||
from .cache import PipelineCache
|
||||
from .storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
|
||||
@dc_dataclass
|
||||
|
||||
@ -14,7 +14,9 @@ from graphrag.config.enums import (
|
||||
StorageType,
|
||||
TextEmbeddingTarget,
|
||||
)
|
||||
from graphrag.config.models import GraphRagConfig, StorageConfig, TextEmbeddingConfig
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.models.storage_config import StorageConfig
|
||||
from graphrag.config.models.text_embedding_config import TextEmbeddingConfig
|
||||
from graphrag.index.config.cache import (
|
||||
PipelineBlobCacheConfig,
|
||||
PipelineCacheConfigTypes,
|
||||
|
||||
@ -2,20 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Definitions for emitting pipeline artifacts to storage."""
|
||||
|
||||
from .csv_table_emitter import CSVTableEmitter
|
||||
from .factories import create_table_emitter, create_table_emitters
|
||||
from .json_table_emitter import JsonTableEmitter
|
||||
from .parquet_table_emitter import ParquetTableEmitter
|
||||
from .table_emitter import TableEmitter
|
||||
from .types import TableEmitterType
|
||||
|
||||
__all__ = [
|
||||
"CSVTableEmitter",
|
||||
"JsonTableEmitter",
|
||||
"ParquetTableEmitter",
|
||||
"TableEmitter",
|
||||
"TableEmitterType",
|
||||
"create_table_emitter",
|
||||
"create_table_emitters",
|
||||
]
|
||||
|
||||
@ -7,9 +7,8 @@ import logging
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
|
||||
from .table_emitter import TableEmitter
|
||||
from graphrag.index.emit.table_emitter import TableEmitter
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -3,15 +3,14 @@
|
||||
|
||||
"""Table Emitter Factories."""
|
||||
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.emit.csv_table_emitter import CSVTableEmitter
|
||||
from graphrag.index.emit.json_table_emitter import JsonTableEmitter
|
||||
from graphrag.index.emit.parquet_table_emitter import ParquetTableEmitter
|
||||
from graphrag.index.emit.table_emitter import TableEmitter
|
||||
from graphrag.index.emit.types import TableEmitterType
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.index.typing import ErrorHandlerFn
|
||||
|
||||
from .csv_table_emitter import CSVTableEmitter
|
||||
from .json_table_emitter import JsonTableEmitter
|
||||
from .parquet_table_emitter import ParquetTableEmitter
|
||||
from .table_emitter import TableEmitter
|
||||
from .types import TableEmitterType
|
||||
|
||||
|
||||
def create_table_emitter(
|
||||
emitter_type: TableEmitterType, storage: PipelineStorage, on_error: ErrorHandlerFn
|
||||
|
||||
@ -7,9 +7,8 @@ import logging
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
|
||||
from .table_emitter import TableEmitter
|
||||
from graphrag.index.emit.table_emitter import TableEmitter
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -9,11 +9,10 @@ import traceback
|
||||
import pandas as pd
|
||||
from pyarrow.lib import ArrowInvalid, ArrowTypeError
|
||||
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.emit.table_emitter import TableEmitter
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.index.typing import ErrorHandlerFn
|
||||
|
||||
from .table_emitter import TableEmitter
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ from datashaper import (
|
||||
VerbCallbacks,
|
||||
)
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.operations.cluster_graph import cluster_graph
|
||||
from graphrag.index.operations.embed_graph import embed_graph
|
||||
from graphrag.index.operations.extract_entities import extract_entities
|
||||
@ -22,7 +22,7 @@ from graphrag.index.operations.snapshot_rows import snapshot_rows
|
||||
from graphrag.index.operations.summarize_descriptions import (
|
||||
summarize_descriptions,
|
||||
)
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
|
||||
async def create_base_entity_graph(
|
||||
|
||||
@ -16,8 +16,8 @@ from datashaper import (
|
||||
|
||||
from graphrag.index.operations.chunk_text import chunk_text
|
||||
from graphrag.index.operations.snapshot import snapshot
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.utils import gen_md5_hash
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.index.utils.hashing import gen_md5_hash
|
||||
|
||||
|
||||
async def create_base_text_units(
|
||||
|
||||
@ -11,7 +11,7 @@ from datashaper import (
|
||||
VerbCallbacks,
|
||||
)
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.graph.extractors.community_reports.schemas import (
|
||||
CLAIM_DESCRIPTION,
|
||||
CLAIM_DETAILS,
|
||||
|
||||
@ -12,7 +12,7 @@ from datashaper import (
|
||||
VerbCallbacks,
|
||||
)
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.operations.extract_covariates import (
|
||||
extract_covariates,
|
||||
)
|
||||
|
||||
@ -13,7 +13,7 @@ from datashaper import (
|
||||
from graphrag.index.operations.layout_graph import layout_graph
|
||||
from graphrag.index.operations.snapshot import snapshot
|
||||
from graphrag.index.operations.unpack_graph import unpack_graph
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
|
||||
async def create_final_nodes(
|
||||
|
||||
@ -10,7 +10,7 @@ from datashaper import (
|
||||
VerbCallbacks,
|
||||
)
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.config.embeddings import (
|
||||
community_full_content_embedding,
|
||||
community_summary_embedding,
|
||||
@ -23,7 +23,7 @@ from graphrag.index.config.embeddings import (
|
||||
)
|
||||
from graphrag.index.operations.embed_text import embed_text
|
||||
from graphrag.index.operations.snapshot import snapshot
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -3,6 +3,6 @@
|
||||
|
||||
"""The Indexing Engine graph embedding package root."""
|
||||
|
||||
from .embedding import NodeEmbeddings, embed_nod2vec
|
||||
from graphrag.index.graph.embedding.embedding import NodeEmbeddings, embed_nod2vec
|
||||
|
||||
__all__ = ["NodeEmbeddings", "embed_nod2vec"]
|
||||
|
||||
@ -5,7 +5,6 @@
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import graspologic as gc
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
@ -28,6 +27,9 @@ def embed_nod2vec(
|
||||
random_seed: int = 86,
|
||||
) -> NodeEmbeddings:
|
||||
"""Generate node embeddings using Node2Vec."""
|
||||
# NOTE: This import is done here to reduce the initial import time of the graphrag package
|
||||
import graspologic as gc
|
||||
|
||||
# generate embedding
|
||||
lcc_tensors = gc.embed.node2vec_embed( # type: ignore
|
||||
graph=graph,
|
||||
|
||||
@ -3,11 +3,11 @@
|
||||
|
||||
"""The Indexing Engine graph extractors package root."""
|
||||
|
||||
from .claims import ClaimExtractor
|
||||
from .community_reports import (
|
||||
from graphrag.index.graph.extractors.claims import ClaimExtractor
|
||||
from graphrag.index.graph.extractors.community_reports import (
|
||||
CommunityReportsExtractor,
|
||||
)
|
||||
from .graph import GraphExtractionResult, GraphExtractor
|
||||
from graphrag.index.graph.extractors.graph import GraphExtractionResult, GraphExtractor
|
||||
|
||||
__all__ = [
|
||||
"ClaimExtractor",
|
||||
|
||||
@ -3,6 +3,6 @@
|
||||
|
||||
"""The Indexing Engine graph extractors claims package root."""
|
||||
|
||||
from .claim_extractor import ClaimExtractor
|
||||
from graphrag.index.graph.extractors.claims.claim_extractor import ClaimExtractor
|
||||
|
||||
__all__ = ["ClaimExtractor"]
|
||||
|
||||
@ -4,12 +4,17 @@
|
||||
"""The Indexing Engine community reports package root."""
|
||||
|
||||
import graphrag.index.graph.extractors.community_reports.schemas as schemas
|
||||
|
||||
from .build_mixed_context import build_mixed_context
|
||||
from .community_reports_extractor import CommunityReportsExtractor
|
||||
from .prep_community_report_context import prep_community_report_context
|
||||
from .sort_context import sort_context
|
||||
from .utils import (
|
||||
from graphrag.index.graph.extractors.community_reports.build_mixed_context import (
|
||||
build_mixed_context,
|
||||
)
|
||||
from graphrag.index.graph.extractors.community_reports.community_reports_extractor import (
|
||||
CommunityReportsExtractor,
|
||||
)
|
||||
from graphrag.index.graph.extractors.community_reports.prep_community_report_context import (
|
||||
prep_community_report_context,
|
||||
)
|
||||
from graphrag.index.graph.extractors.community_reports.sort_context import sort_context
|
||||
from graphrag.index.graph.extractors.community_reports.utils import (
|
||||
filter_claims_to_nodes,
|
||||
filter_edges_to_nodes,
|
||||
filter_nodes_to_level,
|
||||
|
||||
@ -5,10 +5,9 @@
|
||||
import pandas as pd
|
||||
|
||||
import graphrag.index.graph.extractors.community_reports.schemas as schemas
|
||||
from graphrag.index.graph.extractors.community_reports.sort_context import sort_context
|
||||
from graphrag.query.llm.text_utils import num_tokens
|
||||
|
||||
from .sort_context import sort_context
|
||||
|
||||
|
||||
def build_mixed_context(context: list[dict], max_tokens: int) -> str:
|
||||
"""
|
||||
|
||||
@ -9,7 +9,7 @@ from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from graphrag.index.typing import ErrorHandlerFn
|
||||
from graphrag.index.utils import dict_has_keys_with_types
|
||||
from graphrag.index.utils.dicts import dict_has_keys_with_types
|
||||
from graphrag.llm import CompletionLLM
|
||||
from graphrag.prompts.index.community_report import COMMUNITY_REPORT_PROMPT
|
||||
|
||||
|
||||
@ -9,6 +9,11 @@ from typing import cast
|
||||
import pandas as pd
|
||||
|
||||
import graphrag.index.graph.extractors.community_reports.schemas as schemas
|
||||
from graphrag.index.graph.extractors.community_reports.build_mixed_context import (
|
||||
build_mixed_context,
|
||||
)
|
||||
from graphrag.index.graph.extractors.community_reports.sort_context import sort_context
|
||||
from graphrag.index.graph.extractors.community_reports.utils import set_context_size
|
||||
from graphrag.index.utils.dataframes import (
|
||||
antijoin,
|
||||
drop_columns,
|
||||
@ -19,10 +24,6 @@ from graphrag.index.utils.dataframes import (
|
||||
where_column_equals,
|
||||
)
|
||||
|
||||
from .build_mixed_context import build_mixed_context
|
||||
from .sort_context import sort_context
|
||||
from .utils import set_context_size
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
"""The Indexing Engine unipartite graph package root."""
|
||||
|
||||
from .graph_extractor import (
|
||||
from graphrag.index.graph.extractors.graph.graph_extractor import (
|
||||
DEFAULT_ENTITY_TYPES,
|
||||
GraphExtractionResult,
|
||||
GraphExtractor,
|
||||
|
||||
@ -15,7 +15,7 @@ import tiktoken
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
from graphrag.index.typing import ErrorHandlerFn
|
||||
from graphrag.index.utils import clean_str
|
||||
from graphrag.index.utils.string import clean_str
|
||||
from graphrag.llm import CompletionLLM
|
||||
from graphrag.prompts.index.entity_extraction import (
|
||||
CONTINUE_PROMPT,
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
"""The Indexing Engine unipartite graph package root."""
|
||||
|
||||
from .description_summary_extractor import (
|
||||
from graphrag.index.graph.extractors.summarize.description_summary_extractor import (
|
||||
SummarizationResult,
|
||||
SummarizeExtractor,
|
||||
)
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
"""The Indexing Engine graph utils package root."""
|
||||
|
||||
from .normalize_node_names import normalize_node_names
|
||||
from .stable_lcc import stable_largest_connected_component
|
||||
from graphrag.index.graph.utils.normalize_node_names import normalize_node_names
|
||||
from graphrag.index.graph.utils.stable_lcc import stable_largest_connected_component
|
||||
|
||||
__all__ = ["normalize_node_names", "stable_largest_connected_component"]
|
||||
|
||||
@ -6,13 +6,15 @@
|
||||
from typing import Any, cast
|
||||
|
||||
import networkx as nx
|
||||
from graspologic.utils import largest_connected_component
|
||||
|
||||
from .normalize_node_names import normalize_node_names
|
||||
from graphrag.index.graph.utils.normalize_node_names import normalize_node_names
|
||||
|
||||
|
||||
def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
|
||||
"""Return the largest connected component of the graph, with nodes and edges sorted in a stable way."""
|
||||
# NOTE: The import is done here to reduce the initial import time of the module
|
||||
from graspologic.utils import largest_connected_component
|
||||
|
||||
graph = graph.copy()
|
||||
graph = cast(nx.Graph, largest_connected_component(graph))
|
||||
graph = normalize_node_names(graph)
|
||||
|
||||
@ -3,8 +3,11 @@
|
||||
|
||||
"""The Indexing Engine graph visualization package root."""
|
||||
|
||||
from .compute_umap_positions import compute_umap_positions, get_zero_positions
|
||||
from .typing import GraphLayout, NodePosition
|
||||
from graphrag.index.graph.visualization.compute_umap_positions import (
|
||||
compute_umap_positions,
|
||||
get_zero_positions,
|
||||
)
|
||||
from graphrag.index.graph.visualization.typing import GraphLayout, NodePosition
|
||||
|
||||
__all__ = [
|
||||
"GraphLayout",
|
||||
|
||||
@ -3,13 +3,11 @@
|
||||
|
||||
"""A module containing compute_umap_positions and visualize_embedding method definition."""
|
||||
|
||||
import graspologic as gc
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
import umap
|
||||
|
||||
from .typing import NodePosition
|
||||
from graphrag.index.graph.visualization.typing import NodePosition
|
||||
|
||||
|
||||
def get_zero_positions(
|
||||
@ -61,6 +59,9 @@ def compute_umap_positions(
|
||||
random_state: int = 86,
|
||||
) -> list[NodePosition]:
|
||||
"""Project embedding vectors down to 2D/3D using UMAP."""
|
||||
# NOTE: This import is done here to reduce the initial import time of the graphrag package
|
||||
import umap
|
||||
|
||||
embedding_positions = umap.UMAP(
|
||||
min_dist=min_dist,
|
||||
n_neighbors=n_neighbors,
|
||||
@ -105,6 +106,9 @@ def visualize_embedding(
|
||||
umap_positions: list[dict],
|
||||
):
|
||||
"""Project embedding down to 2D using UMAP and visualize."""
|
||||
# NOTE: This import is done here to reduce the initial import time of the graphrag package
|
||||
import graspologic as gc
|
||||
|
||||
# rendering
|
||||
plt.clf()
|
||||
figure = plt.gcf()
|
||||
|
||||
@ -2,7 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""The Indexing Engine input package root."""
|
||||
|
||||
from .load_input import load_input
|
||||
|
||||
__all__ = ["load_input"]
|
||||
|
||||
@ -10,10 +10,10 @@ from typing import cast
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.index.config import PipelineCSVInputConfig, PipelineInputConfig
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.utils import gen_md5_hash
|
||||
from graphrag.logging import ProgressReporter
|
||||
from graphrag.index.config.input import PipelineCSVInputConfig, PipelineInputConfig
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.index.utils.hashing import gen_md5_hash
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -10,18 +10,17 @@ from typing import cast
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.config import InputConfig, InputType
|
||||
from graphrag.index.config import PipelineInputConfig
|
||||
from graphrag.index.storage import (
|
||||
BlobPipelineStorage,
|
||||
FilePipelineStorage,
|
||||
)
|
||||
from graphrag.logging import NullProgressReporter, ProgressReporter
|
||||
|
||||
from .csv import input_type as csv
|
||||
from .csv import load as load_csv
|
||||
from .text import input_type as text
|
||||
from .text import load as load_text
|
||||
from graphrag.config.enums import InputType
|
||||
from graphrag.config.models.input_config import InputConfig
|
||||
from graphrag.index.config.input import PipelineInputConfig
|
||||
from graphrag.index.input.csv import input_type as csv
|
||||
from graphrag.index.input.csv import load as load_csv
|
||||
from graphrag.index.input.text import input_type as text
|
||||
from graphrag.index.input.text import load as load_text
|
||||
from graphrag.index.storage.blob_pipeline_storage import BlobPipelineStorage
|
||||
from graphrag.index.storage.file_pipeline_storage import FilePipelineStorage
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logging.null_progress import NullProgressReporter
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
loaders: dict[str, Callable[..., Awaitable[pd.DataFrame]]] = {
|
||||
|
||||
@ -10,10 +10,10 @@ from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from graphrag.index.config import PipelineInputConfig
|
||||
from graphrag.index.storage import PipelineStorage
|
||||
from graphrag.index.utils import gen_md5_hash
|
||||
from graphrag.logging import ProgressReporter
|
||||
from graphrag.index.config.input import PipelineInputConfig
|
||||
from graphrag.index.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.index.utils.hashing import gen_md5_hash
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
|
||||
DEFAULT_FILE_PATTERN = re.compile(
|
||||
r".*[\\/](?P<source>[^\\/]+)[\\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.txt"
|
||||
|
||||
@ -2,13 +2,3 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""The Indexing Engine LLM package root."""
|
||||
|
||||
from .load_llm import load_llm, load_llm_embeddings
|
||||
from .types import TextListSplitter, TextSplitter
|
||||
|
||||
__all__ = [
|
||||
"TextListSplitter",
|
||||
"TextSplitter",
|
||||
"load_llm",
|
||||
"load_llm_embeddings",
|
||||
]
|
||||
|
||||
@ -27,7 +27,7 @@ from graphrag.llm import (
|
||||
if TYPE_CHECKING:
|
||||
from datashaper import VerbCallbacks
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.typing import ErrorHandlerFn
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@ -9,10 +9,9 @@ from pathlib import Path
|
||||
import yaml
|
||||
from pyaml_env import parse_config as parse_config_with_env
|
||||
|
||||
from graphrag.config import create_graphrag_config, read_dotenv
|
||||
from graphrag.index.config import PipelineConfig
|
||||
|
||||
from .create_pipeline_config import create_pipeline_config
|
||||
from graphrag.config.create_graphrag_config import create_graphrag_config, read_dotenv
|
||||
from graphrag.index.config.pipeline import PipelineConfig
|
||||
from graphrag.index.create_pipeline_config import create_pipeline_config
|
||||
|
||||
|
||||
def load_pipeline_config(config_or_path: str | PipelineConfig) -> PipelineConfig:
|
||||
|
||||
@ -3,6 +3,10 @@
|
||||
|
||||
"""The Indexing Engine text chunk package root."""
|
||||
|
||||
from .chunk_text import ChunkStrategy, ChunkStrategyType, chunk_text
|
||||
from graphrag.index.operations.chunk_text.chunk_text import (
|
||||
ChunkStrategy,
|
||||
ChunkStrategyType,
|
||||
chunk_text,
|
||||
)
|
||||
|
||||
__all__ = ["ChunkStrategy", "ChunkStrategyType", "chunk_text"]
|
||||
|
||||
@ -12,7 +12,11 @@ from datashaper import (
|
||||
progress_ticker,
|
||||
)
|
||||
|
||||
from .typing import ChunkInput, ChunkStrategy, ChunkStrategyType
|
||||
from graphrag.index.operations.chunk_text.typing import (
|
||||
ChunkInput,
|
||||
ChunkStrategy,
|
||||
ChunkStrategyType,
|
||||
)
|
||||
|
||||
|
||||
def chunk_text(
|
||||
@ -117,14 +121,13 @@ def load_strategy(strategy: ChunkStrategyType) -> ChunkStrategy:
|
||||
"""Load strategy method definition."""
|
||||
match strategy:
|
||||
case ChunkStrategyType.tokens:
|
||||
from .strategies import run_tokens
|
||||
from graphrag.index.operations.chunk_text.strategies import run_tokens
|
||||
|
||||
return run_tokens
|
||||
case ChunkStrategyType.sentence:
|
||||
# NLTK
|
||||
from graphrag.index.bootstrap import bootstrap
|
||||
|
||||
from .strategies import run_sentences
|
||||
from graphrag.index.operations.chunk_text.strategies import run_sentences
|
||||
|
||||
bootstrap()
|
||||
return run_sentences
|
||||
|
||||
@ -11,9 +11,8 @@ import tiktoken
|
||||
from datashaper import ProgressTicker
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
from graphrag.index.text_splitting import Tokenizer
|
||||
|
||||
from .typing import TextChunk
|
||||
from graphrag.index.operations.chunk_text.typing import TextChunk
|
||||
from graphrag.index.text_splitting.text_splitting import Tokenizer
|
||||
|
||||
|
||||
def run_tokens(
|
||||
|
||||
@ -11,10 +11,9 @@ from typing import Any, cast
|
||||
import networkx as nx
|
||||
import pandas as pd
|
||||
from datashaper import VerbCallbacks, progress_iterable
|
||||
from graspologic.partition import hierarchical_leiden
|
||||
|
||||
from graphrag.index.graph.utils import stable_largest_connected_component
|
||||
from graphrag.index.utils import gen_uuid
|
||||
from graphrag.index.utils.uuid import gen_uuid
|
||||
|
||||
Communities = list[tuple[int, str, list[str]]]
|
||||
|
||||
@ -187,6 +186,9 @@ def _compute_leiden_communities(
|
||||
seed=0xDEADBEEF,
|
||||
) -> dict[int, dict[str, int]]:
|
||||
"""Return Leiden root communities."""
|
||||
# NOTE: This import is done here to reduce the initial import time of the graphrag package
|
||||
from graspologic.partition import hierarchical_leiden
|
||||
|
||||
if use_lcc:
|
||||
graph = stable_largest_connected_component(graph)
|
||||
|
||||
|
||||
@ -3,7 +3,10 @@
|
||||
|
||||
"""The Indexing Engine graph embed package root."""
|
||||
|
||||
from .embed_graph import EmbedGraphStrategyType, embed_graph
|
||||
from .typing import NodeEmbeddings
|
||||
from graphrag.index.operations.embed_graph.embed_graph import (
|
||||
EmbedGraphStrategyType,
|
||||
embed_graph,
|
||||
)
|
||||
from graphrag.index.operations.embed_graph.typing import NodeEmbeddings
|
||||
|
||||
__all__ = ["EmbedGraphStrategyType", "NodeEmbeddings", "embed_graph"]
|
||||
|
||||
@ -12,9 +12,8 @@ from datashaper import VerbCallbacks, derive_from_rows
|
||||
|
||||
from graphrag.index.graph.embedding import embed_nod2vec
|
||||
from graphrag.index.graph.utils import stable_largest_connected_component
|
||||
from graphrag.index.utils import load_graph
|
||||
|
||||
from .typing import NodeEmbeddings
|
||||
from graphrag.index.operations.embed_graph.typing import NodeEmbeddings
|
||||
from graphrag.index.utils.load_graph import load_graph
|
||||
|
||||
|
||||
class EmbedGraphStrategyType(str, Enum):
|
||||
|
||||
@ -3,6 +3,9 @@
|
||||
|
||||
"""The Indexing Engine text embed package root."""
|
||||
|
||||
from .embed_text import TextEmbedStrategyType, embed_text
|
||||
from graphrag.index.operations.embed_text.embed_text import (
|
||||
TextEmbedStrategyType,
|
||||
embed_text,
|
||||
)
|
||||
|
||||
__all__ = ["TextEmbedStrategyType", "embed_text"]
|
||||
|
||||
@ -11,15 +11,11 @@ import numpy as np
|
||||
import pandas as pd
|
||||
from datashaper import VerbCallbacks
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingStrategy
|
||||
from graphrag.utils.embeddings import create_collection_name
|
||||
from graphrag.vector_stores import (
|
||||
BaseVectorStore,
|
||||
VectorStoreDocument,
|
||||
VectorStoreFactory,
|
||||
)
|
||||
|
||||
from .strategies.typing import TextEmbeddingStrategy
|
||||
from graphrag.vector_stores.base import BaseVectorStore, VectorStoreDocument
|
||||
from graphrag.vector_stores.factory import VectorStoreFactory
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@ -242,11 +238,15 @@ def load_strategy(strategy: TextEmbedStrategyType) -> TextEmbeddingStrategy:
|
||||
"""Load strategy method definition."""
|
||||
match strategy:
|
||||
case TextEmbedStrategyType.openai:
|
||||
from .strategies.openai import run as run_openai
|
||||
from graphrag.index.operations.embed_text.strategies.openai import (
|
||||
run as run_openai,
|
||||
)
|
||||
|
||||
return run_openai
|
||||
case TextEmbedStrategyType.mock:
|
||||
from .strategies.mock import run as run_mock
|
||||
from graphrag.index.operations.embed_text.strategies.mock import (
|
||||
run as run_mock,
|
||||
)
|
||||
|
||||
return run_mock
|
||||
case _:
|
||||
|
||||
@ -9,9 +9,8 @@ from typing import Any
|
||||
|
||||
from datashaper import ProgressTicker, VerbCallbacks, progress_ticker
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
|
||||
from .typing import TextEmbeddingResult
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingResult
|
||||
|
||||
|
||||
async def run( # noqa RUF029 async is required for interface
|
||||
|
||||
@ -11,14 +11,13 @@ import numpy as np
|
||||
from datashaper import ProgressTicker, VerbCallbacks, progress_ticker
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.llm import load_llm_embeddings
|
||||
from graphrag.index.text_splitting import TokenTextSplitter
|
||||
from graphrag.index.utils import is_null
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.llm.load_llm import load_llm_embeddings
|
||||
from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingResult
|
||||
from graphrag.index.text_splitting.text_splitting import TokenTextSplitter
|
||||
from graphrag.index.utils.is_null import is_null
|
||||
from graphrag.llm import EmbeddingLLM, OpenAIConfiguration
|
||||
|
||||
from .typing import TextEmbeddingResult
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@ from dataclasses import dataclass
|
||||
|
||||
from datashaper import VerbCallbacks
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@ -3,6 +3,9 @@
|
||||
|
||||
"""The Indexing Engine text extract claims package root."""
|
||||
|
||||
from .extract_covariates import ExtractClaimsStrategyType, extract_covariates
|
||||
from graphrag.index.operations.extract_covariates.extract_covariates import (
|
||||
ExtractClaimsStrategyType,
|
||||
extract_covariates,
|
||||
)
|
||||
|
||||
__all__ = ["ExtractClaimsStrategyType", "extract_covariates"]
|
||||
|
||||
@ -14,9 +14,12 @@ from datashaper import (
|
||||
derive_from_rows,
|
||||
)
|
||||
|
||||
from graphrag.index.cache import PipelineCache
|
||||
|
||||
from .typing import Covariate, CovariateExtractStrategy, ExtractClaimsStrategyType
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.operations.extract_covariates.typing import (
|
||||
Covariate,
|
||||
CovariateExtractStrategy,
|
||||
ExtractClaimsStrategyType,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@ -72,7 +75,9 @@ def load_strategy(strategy_type: ExtractClaimsStrategyType) -> CovariateExtractS
|
||||
"""Load strategy method definition."""
|
||||
match strategy_type:
|
||||
case ExtractClaimsStrategyType.graph_intelligence:
|
||||
from .strategies import run_graph_intelligence
|
||||
from graphrag.index.operations.extract_covariates.strategies import (
|
||||
run_graph_intelligence,
|
||||
)
|
||||
|
||||
return run_graph_intelligence
|
||||
case _:
|
||||
|
||||
@ -9,15 +9,14 @@ from typing import Any
|
||||
from datashaper import VerbCallbacks
|
||||
|
||||
import graphrag.config.defaults as defs
|
||||
from graphrag.index.cache import PipelineCache
|
||||
from graphrag.index.cache.pipeline_cache import PipelineCache
|
||||
from graphrag.index.graph.extractors.claims import ClaimExtractor
|
||||
from graphrag.index.llm import load_llm
|
||||
from graphrag.llm import CompletionLLM
|
||||
|
||||
from .typing import (
|
||||
from graphrag.index.llm.load_llm import load_llm
|
||||
from graphrag.index.operations.extract_covariates.typing import (
|
||||
Covariate,
|
||||
CovariateExtractionResult,
|
||||
)
|
||||
from graphrag.llm import CompletionLLM
|
||||
|
||||
|
||||
async def run_graph_intelligence(
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user