mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 00:57:23 +08:00
Fix DynamicCommunitySelection type mismatch for children IDs
Convert children IDs to strings before checking membership in self.reports to handle cases where children list contains integers while reports dict uses string keys. Fixes #2004 Signed-off-by: majiayu000 <1835304752@qq.com>
This commit is contained in:
parent
fdb7e3835b
commit
6cb803c878
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Fix DynamicCommunitySelection type mismatch where children IDs as integers were not matching string report keys"
|
||||
}
|
||||
@ -123,12 +123,14 @@ class DynamicCommunitySelection:
|
||||
# TODO check why some sub_communities are NOT in report_df
|
||||
if community in self.communities:
|
||||
for child in self.communities[community].children:
|
||||
if child in self.reports:
|
||||
communities_to_rate.append(child)
|
||||
# Ensure child is a string to match self.reports keys
|
||||
child_id = str(child)
|
||||
if child_id in self.reports:
|
||||
communities_to_rate.append(child_id)
|
||||
else:
|
||||
logger.debug(
|
||||
"dynamic community selection: cannot find community %s in reports",
|
||||
child,
|
||||
child_id,
|
||||
)
|
||||
# remove parent node if the current node is deemed relevant
|
||||
if not self.keep_parent and community in self.communities:
|
||||
|
||||
@ -0,0 +1,205 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Tests for dynamic community selection with type handling."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from graphrag.data_model.community import Community
|
||||
from graphrag.data_model.community_report import CommunityReport
|
||||
from graphrag.query.context_builder.dynamic_community_selection import (
|
||||
DynamicCommunitySelection,
|
||||
)
|
||||
|
||||
|
||||
def create_mock_tokenizer() -> MagicMock:
|
||||
"""Create a mock tokenizer."""
|
||||
tokenizer = MagicMock()
|
||||
tokenizer.encode.return_value = [1, 2, 3]
|
||||
return tokenizer
|
||||
|
||||
|
||||
def create_mock_model() -> MagicMock:
|
||||
"""Create a mock chat model."""
|
||||
return MagicMock()
|
||||
|
||||
|
||||
def test_dynamic_community_selection_handles_int_children():
|
||||
"""Test that DynamicCommunitySelection correctly handles children IDs as integers.
|
||||
|
||||
This tests the fix for issue #2004 where children IDs could be integers
|
||||
while self.reports keys are strings, causing child communities to be skipped.
|
||||
"""
|
||||
# Create communities with integer children (simulating the bug scenario)
|
||||
# Note: Even though the type annotation says list[str], actual data may have ints
|
||||
communities = [
|
||||
Community(
|
||||
id="comm-0",
|
||||
short_id="0",
|
||||
title="Root Community",
|
||||
level="0",
|
||||
parent="",
|
||||
children=[1, 2], # type: ignore[list-item] # Integer children - testing bug fix
|
||||
),
|
||||
Community(
|
||||
id="comm-1",
|
||||
short_id="1",
|
||||
title="Child Community 1",
|
||||
level="1",
|
||||
parent="0",
|
||||
children=[],
|
||||
),
|
||||
Community(
|
||||
id="comm-2",
|
||||
short_id="2",
|
||||
title="Child Community 2",
|
||||
level="1",
|
||||
parent="0",
|
||||
children=[],
|
||||
),
|
||||
]
|
||||
|
||||
# Create community reports with string community_id
|
||||
reports = [
|
||||
CommunityReport(
|
||||
id="report-0",
|
||||
short_id="0",
|
||||
title="Report 0",
|
||||
community_id="0",
|
||||
summary="Root community summary",
|
||||
full_content="Root community full content",
|
||||
rank=1.0,
|
||||
),
|
||||
CommunityReport(
|
||||
id="report-1",
|
||||
short_id="1",
|
||||
title="Report 1",
|
||||
community_id="1",
|
||||
summary="Child 1 summary",
|
||||
full_content="Child 1 full content",
|
||||
rank=1.0,
|
||||
),
|
||||
CommunityReport(
|
||||
id="report-2",
|
||||
short_id="2",
|
||||
title="Report 2",
|
||||
community_id="2",
|
||||
summary="Child 2 summary",
|
||||
full_content="Child 2 full content",
|
||||
rank=1.0,
|
||||
),
|
||||
]
|
||||
|
||||
model = create_mock_model()
|
||||
tokenizer = create_mock_tokenizer()
|
||||
|
||||
selector = DynamicCommunitySelection(
|
||||
community_reports=reports,
|
||||
communities=communities,
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
threshold=1,
|
||||
keep_parent=False,
|
||||
max_level=2,
|
||||
)
|
||||
|
||||
# Verify that reports are keyed by string
|
||||
assert "0" in selector.reports
|
||||
assert "1" in selector.reports
|
||||
assert "2" in selector.reports
|
||||
|
||||
# Verify that communities are keyed by string short_id
|
||||
assert "0" in selector.communities
|
||||
assert "1" in selector.communities
|
||||
assert "2" in selector.communities
|
||||
|
||||
# Verify that the children are properly accessible
|
||||
# Before the fix, int children would fail the `in self.reports` check
|
||||
root_community = selector.communities["0"]
|
||||
for child in root_community.children:
|
||||
child_id = str(child)
|
||||
# This should now work with the fix
|
||||
assert child_id in selector.reports, (
|
||||
f"Child {child} (as '{child_id}') should be found in reports"
|
||||
)
|
||||
|
||||
|
||||
def test_dynamic_community_selection_handles_str_children():
|
||||
"""Test that DynamicCommunitySelection works correctly with string children IDs."""
|
||||
communities = [
|
||||
Community(
|
||||
id="comm-0",
|
||||
short_id="0",
|
||||
title="Root Community",
|
||||
level="0",
|
||||
parent="",
|
||||
children=["1", "2"], # String children - expected type
|
||||
),
|
||||
Community(
|
||||
id="comm-1",
|
||||
short_id="1",
|
||||
title="Child Community 1",
|
||||
level="1",
|
||||
parent="0",
|
||||
children=[],
|
||||
),
|
||||
Community(
|
||||
id="comm-2",
|
||||
short_id="2",
|
||||
title="Child Community 2",
|
||||
level="1",
|
||||
parent="0",
|
||||
children=[],
|
||||
),
|
||||
]
|
||||
|
||||
reports = [
|
||||
CommunityReport(
|
||||
id="report-0",
|
||||
short_id="0",
|
||||
title="Report 0",
|
||||
community_id="0",
|
||||
summary="Root community summary",
|
||||
full_content="Root community full content",
|
||||
rank=1.0,
|
||||
),
|
||||
CommunityReport(
|
||||
id="report-1",
|
||||
short_id="1",
|
||||
title="Report 1",
|
||||
community_id="1",
|
||||
summary="Child 1 summary",
|
||||
full_content="Child 1 full content",
|
||||
rank=1.0,
|
||||
),
|
||||
CommunityReport(
|
||||
id="report-2",
|
||||
short_id="2",
|
||||
title="Report 2",
|
||||
community_id="2",
|
||||
summary="Child 2 summary",
|
||||
full_content="Child 2 full content",
|
||||
rank=1.0,
|
||||
),
|
||||
]
|
||||
|
||||
model = create_mock_model()
|
||||
tokenizer = create_mock_tokenizer()
|
||||
|
||||
selector = DynamicCommunitySelection(
|
||||
community_reports=reports,
|
||||
communities=communities,
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
threshold=1,
|
||||
keep_parent=False,
|
||||
max_level=2,
|
||||
)
|
||||
|
||||
# Verify that children can be found in reports
|
||||
root_community = selector.communities["0"]
|
||||
for child in root_community.children:
|
||||
child_id = str(child)
|
||||
assert child_id in selector.reports, (
|
||||
f"Child {child} (as '{child_id}') should be found in reports"
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user