Use stable ids for community reports

This commit is contained in:
Nathan Evans 2026-01-12 14:40:30 -08:00
parent 710fdad6f0
commit a4d1278c2a

View File

@ -3,11 +3,10 @@
"""All the steps to transform final entities."""
from uuid import uuid4
import pandas as pd
from graphrag.data_model.schemas import COMMUNITY_REPORTS_FINAL_COLUMNS
from graphrag.index.utils.hashing import gen_sha512_hash
def finalize_community_reports(
@ -25,7 +24,9 @@ def finalize_community_reports(
community_reports["community"] = community_reports["community"].astype(int)
community_reports["human_readable_id"] = community_reports["community"]
community_reports["id"] = [uuid4().hex for _ in range(len(community_reports))]
community_reports["id"] = community_reports.apply(
lambda row: gen_sha512_hash(row, ["full_content"]), axis=1
)
return community_reports.loc[
:,