Remove duplicated entried from relationships and nodes (#1333)

This commit is contained in:
Alonso Guevara 2024-10-28 22:56:07 -06:00 committed by GitHub
parent 083de12bcf
commit 83026bdb26
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 8 additions and 2 deletions

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Remove duplicated relationships and nodes"
}

View File

@ -69,4 +69,5 @@ async def create_final_nodes(
)
joined.rename(columns={"label": "title", "cluster": "community"}, inplace=True)
return joined
# TODO: Find duplication source
return joined.drop_duplicates(subset=["title", "community"])

View File

@ -66,4 +66,5 @@ async def create_final_relationships(
"text_unit_ids"
].str.split(",")
return edge_combined_degree
# TODO: Find duplication source
return edge_combined_degree.drop_duplicates(subset=["source", "target"])