Print sort context

This commit is contained in:
Alonso Guevara 2025-04-14 16:45:06 -06:00
parent 53268406fe
commit bbd9db9701
2 changed files with 15 additions and 1 deletions

View File

@ -48,6 +48,9 @@ def prep_community_report_context(
valid_context_df = _within_context(level_context_df)
invalid_context_df = _exceeding_context(level_context_df)
log.info(f"Found {len(valid_context_df)} valid context records at level {level}")
log.info(f"Found {len(invalid_context_df)} invalid context records at level {level}")
# there is no report to substitute with, so we just trim the local context of the invalid context records
# this case should only happen at the bottom level of the community hierarchy where there are no sub-communities
if invalid_context_df.empty:

View File

@ -7,11 +7,14 @@ import pandas as pd
import graphrag.index.graph.extractors.community_reports.schemas as schemas
from graphrag.query.llm.text_utils import num_tokens
import logging
log = logging.getLogger(__name__)
def sort_context(
local_context: list[dict],
sub_community_reports: list[dict] | None = None,
max_tokens: int | None = None,
max_tokens: int | None = 8000,
node_id_column: str = schemas.NODE_ID,
node_name_column: str = schemas.NODE_NAME,
node_details_column: str = schemas.NODE_DETAILS,
@ -29,6 +32,14 @@ def sort_context(
If max tokens is provided, we will return the context string that fits within the token limit.
"""
log.info(
f"Sorting local context with {len(local_context)} records and sub-community reports with {len(sub_community_reports) if sub_community_reports else 0} records"
)
log.info(
f"Sorting context {local_context} with max tokens: {max_tokens} and sub-community reports: {sub_community_reports}"
)
def _get_context_string(
entities: list[dict],
edges: list[dict],