Print sort context

This commit is contained in:
Alonso Guevara 2025-04-14 16:45:06 -06:00
parent 53268406fe
commit bbd9db9701
2 changed files with 15 additions and 1 deletions

View File

@ -48,6 +48,9 @@ def prep_community_report_context(
valid_context_df = _within_context(level_context_df) valid_context_df = _within_context(level_context_df)
invalid_context_df = _exceeding_context(level_context_df) invalid_context_df = _exceeding_context(level_context_df)
log.info(f"Found {len(valid_context_df)} valid context records at level {level}")
log.info(f"Found {len(invalid_context_df)} invalid context records at level {level}")
# there is no report to substitute with, so we just trim the local context of the invalid context records # there is no report to substitute with, so we just trim the local context of the invalid context records
# this case should only happen at the bottom level of the community hierarchy where there are no sub-communities # this case should only happen at the bottom level of the community hierarchy where there are no sub-communities
if invalid_context_df.empty: if invalid_context_df.empty:

View File

@ -7,11 +7,14 @@ import pandas as pd
import graphrag.index.graph.extractors.community_reports.schemas as schemas import graphrag.index.graph.extractors.community_reports.schemas as schemas
from graphrag.query.llm.text_utils import num_tokens from graphrag.query.llm.text_utils import num_tokens
import logging
log = logging.getLogger(__name__)
def sort_context( def sort_context(
local_context: list[dict], local_context: list[dict],
sub_community_reports: list[dict] | None = None, sub_community_reports: list[dict] | None = None,
max_tokens: int | None = None, max_tokens: int | None = 8000,
node_id_column: str = schemas.NODE_ID, node_id_column: str = schemas.NODE_ID,
node_name_column: str = schemas.NODE_NAME, node_name_column: str = schemas.NODE_NAME,
node_details_column: str = schemas.NODE_DETAILS, node_details_column: str = schemas.NODE_DETAILS,
@ -29,6 +32,14 @@ def sort_context(
If max tokens is provided, we will return the context string that fits within the token limit. If max tokens is provided, we will return the context string that fits within the token limit.
""" """
log.info(
f"Sorting local context with {len(local_context)} records and sub-community reports with {len(sub_community_reports) if sub_community_reports else 0} records"
)
log.info(
f"Sorting context {local_context} with max tokens: {max_tokens} and sub-community reports: {sub_community_reports}"
)
def _get_context_string( def _get_context_string(
entities: list[dict], entities: list[dict],
edges: list[dict], edges: list[dict],