mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 00:57:23 +08:00
Print sort context
This commit is contained in:
parent
53268406fe
commit
bbd9db9701
@ -48,6 +48,9 @@ def prep_community_report_context(
|
|||||||
valid_context_df = _within_context(level_context_df)
|
valid_context_df = _within_context(level_context_df)
|
||||||
invalid_context_df = _exceeding_context(level_context_df)
|
invalid_context_df = _exceeding_context(level_context_df)
|
||||||
|
|
||||||
|
log.info(f"Found {len(valid_context_df)} valid context records at level {level}")
|
||||||
|
log.info(f"Found {len(invalid_context_df)} invalid context records at level {level}")
|
||||||
|
|
||||||
# there is no report to substitute with, so we just trim the local context of the invalid context records
|
# there is no report to substitute with, so we just trim the local context of the invalid context records
|
||||||
# this case should only happen at the bottom level of the community hierarchy where there are no sub-communities
|
# this case should only happen at the bottom level of the community hierarchy where there are no sub-communities
|
||||||
if invalid_context_df.empty:
|
if invalid_context_df.empty:
|
||||||
|
|||||||
@ -7,11 +7,14 @@ import pandas as pd
|
|||||||
import graphrag.index.graph.extractors.community_reports.schemas as schemas
|
import graphrag.index.graph.extractors.community_reports.schemas as schemas
|
||||||
from graphrag.query.llm.text_utils import num_tokens
|
from graphrag.query.llm.text_utils import num_tokens
|
||||||
|
|
||||||
|
import logging
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def sort_context(
|
def sort_context(
|
||||||
local_context: list[dict],
|
local_context: list[dict],
|
||||||
sub_community_reports: list[dict] | None = None,
|
sub_community_reports: list[dict] | None = None,
|
||||||
max_tokens: int | None = None,
|
max_tokens: int | None = 8000,
|
||||||
node_id_column: str = schemas.NODE_ID,
|
node_id_column: str = schemas.NODE_ID,
|
||||||
node_name_column: str = schemas.NODE_NAME,
|
node_name_column: str = schemas.NODE_NAME,
|
||||||
node_details_column: str = schemas.NODE_DETAILS,
|
node_details_column: str = schemas.NODE_DETAILS,
|
||||||
@ -29,6 +32,14 @@ def sort_context(
|
|||||||
If max tokens is provided, we will return the context string that fits within the token limit.
|
If max tokens is provided, we will return the context string that fits within the token limit.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
f"Sorting local context with {len(local_context)} records and sub-community reports with {len(sub_community_reports) if sub_community_reports else 0} records"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
f"Sorting context {local_context} with max tokens: {max_tokens} and sub-community reports: {sub_community_reports}"
|
||||||
|
)
|
||||||
|
|
||||||
def _get_context_string(
|
def _get_context_string(
|
||||||
entities: list[dict],
|
entities: list[dict],
|
||||||
edges: list[dict],
|
edges: list[dict],
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user