diff --git a/graphrag/index/graph/extractors/community_reports/prep_community_report_context.py b/graphrag/index/graph/extractors/community_reports/prep_community_report_context.py index 2ec42220..96898421 100644 --- a/graphrag/index/graph/extractors/community_reports/prep_community_report_context.py +++ b/graphrag/index/graph/extractors/community_reports/prep_community_report_context.py @@ -48,6 +48,9 @@ def prep_community_report_context( valid_context_df = _within_context(level_context_df) invalid_context_df = _exceeding_context(level_context_df) + log.info(f"Found {len(valid_context_df)} valid context records at level {level}") + log.info(f"Found {len(invalid_context_df)} invalid context records at level {level}") + # there is no report to substitute with, so we just trim the local context of the invalid context records # this case should only happen at the bottom level of the community hierarchy where there are no sub-communities if invalid_context_df.empty: diff --git a/graphrag/index/graph/extractors/community_reports/sort_context.py b/graphrag/index/graph/extractors/community_reports/sort_context.py index 811cb7e9..e6c4a480 100644 --- a/graphrag/index/graph/extractors/community_reports/sort_context.py +++ b/graphrag/index/graph/extractors/community_reports/sort_context.py @@ -7,11 +7,14 @@ import pandas as pd import graphrag.index.graph.extractors.community_reports.schemas as schemas from graphrag.query.llm.text_utils import num_tokens +import logging +log = logging.getLogger(__name__) + def sort_context( local_context: list[dict], sub_community_reports: list[dict] | None = None, - max_tokens: int | None = None, + max_tokens: int | None = 8000, node_id_column: str = schemas.NODE_ID, node_name_column: str = schemas.NODE_NAME, node_details_column: str = schemas.NODE_DETAILS, @@ -29,6 +32,14 @@ def sort_context( If max tokens is provided, we will return the context string that fits within the token limit. """ + log.info( + f"Sorting local context with {len(local_context)} records and sub-community reports with {len(sub_community_reports) if sub_community_reports else 0} records" + ) + + log.info( + f"Sorting context {local_context} with max tokens: {max_tokens} and sub-community reports: {sub_community_reports}" + ) + def _get_context_string( entities: list[dict], edges: list[dict],