mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
Remove duplicated entried from relationships and nodes (#1333)
This commit is contained in:
parent
083de12bcf
commit
83026bdb26
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Remove duplicated relationships and nodes"
|
||||
}
|
||||
@ -69,4 +69,5 @@ async def create_final_nodes(
|
||||
)
|
||||
joined.rename(columns={"label": "title", "cluster": "community"}, inplace=True)
|
||||
|
||||
return joined
|
||||
# TODO: Find duplication source
|
||||
return joined.drop_duplicates(subset=["title", "community"])
|
||||
|
||||
@ -66,4 +66,5 @@ async def create_final_relationships(
|
||||
"text_unit_ids"
|
||||
].str.split(",")
|
||||
|
||||
return edge_combined_degree
|
||||
# TODO: Find duplication source
|
||||
return edge_combined_degree.drop_duplicates(subset=["source", "target"])
|
||||
|
||||
Loading…
Reference in New Issue
Block a user