Print sort context

2026-01-14 00:57:23 +08:00 · 2025-04-14 16:45:06 -06:00 · 2025-04-14 16:45:06 -06:00 · bbd9db9701
commit bbd9db9701
parent 53268406fe
2 changed files with 15 additions and 1 deletions
--- a/graphrag/index/graph/extractors/community_reports/prep_community_report_context.py
+++ b/graphrag/index/graph/extractors/community_reports/prep_community_report_context.py
@ -48,6 +48,9 @@ def prep_community_report_context(
    valid_context_df = _within_context(level_context_df)
    invalid_context_df = _exceeding_context(level_context_df)
    log.info(f"Found {len(valid_context_df)} valid context records at level {level}")
    log.info(f"Found {len(invalid_context_df)} invalid context records at level {level}")
    # there is no report to substitute with, so we just trim the local context of the invalid context records
    # this case should only happen at the bottom level of the community hierarchy where there are no sub-communities
    if invalid_context_df.empty:
--- a/graphrag/index/graph/extractors/community_reports/sort_context.py
+++ b/graphrag/index/graph/extractors/community_reports/sort_context.py
@ -7,11 +7,14 @@ import pandas as pd
 import graphrag.index.graph.extractors.community_reports.schemas as schemas
 from graphrag.query.llm.text_utils import num_tokens
 import logging
 log = logging.getLogger(__name__)
 def sort_context(
    local_context: list[dict],
    sub_community_reports: list[dict] | None = None,
-    max_tokens: int | None = None,
+    max_tokens: int | None = 8000,
    node_id_column: str = schemas.NODE_ID,
    node_name_column: str = schemas.NODE_NAME,
    node_details_column: str = schemas.NODE_DETAILS,
@ -29,6 +32,14 @@ def sort_context(
    If max tokens is provided, we will return the context string that fits within the token limit.
    """
    log.info(
        f"Sorting local context with {len(local_context)} records and sub-community reports with {len(sub_community_reports) if sub_community_reports else 0} records"
    )
    log.info(
        f"Sorting context {local_context} with max tokens: {max_tokens} and sub-community reports: {sub_community_reports}"
    )
    def _get_context_string(
        entities: list[dict],
        edges: list[dict],