diff --git a/examples_notebooks/index_migration_to_v2/index.html b/examples_notebooks/index_migration_to_v2/index.html
index 109c4d67..d90ea3e3 100644
--- a/examples_notebooks/index_migration_to_v2/index.html
+++ b/examples_notebooks/index_migration_to_v2/index.html
@@ -1915,11 +1915,11 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
# This is the directory that has your settings.yaml
-PROJECT_DIRECTORY = "<your project directory"
+PROJECT_DIRECTORY = "<your project directory>"
# This is the directory that has your settings.yaml
-PROJECT_DIRECTORY = "
-
+PROJECT_DIRECTORY = ""
+
@@ -2016,7 +2016,7 @@ File ~/work/graphrag/graphrag/graphrag/config/load_c
42 for file in _default_config_files:
43 if (root / file).is_file():
-FileNotFoundError: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory is not a directory
+FileNotFoundError: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory> is not a directory
@@ -2099,6 +2099,30 @@ File ~/work/graphrag/graphrag/graphrag/config/load_c
final_nodes.loc[:, ["id", "degree", "x", "y"]].groupby("id").first().reset_index()
)
final_entities = final_entities.merge(graph_props, on="id", how="left")
+# we're also persistint the frequency column
+final_entities["frequency"] = final_entities["text_unit_ids"].count()
+
+
+# we added children to communities to eliminate query-time reconstruction
+parent_grouped = final_communities.groupby("parent").agg(
+ children=("community", "unique")
+)
+final_communities = final_communities.merge(
+ parent_grouped,
+ left_on="community",
+ right_on="parent",
+ how="left",
+)
+
+# add children to the reports as well
+final_community_reports = final_community_reports.merge(
+ parent_grouped,
+ left_on="community",
+ right_on="parent",
+ how="left",
+)
+
+# copy children into the reports as well
# we renamed all the output files for better clarity now that we don't have workflow naming constraints from DataShaper
await write_table_to_storage(final_documents, "documents", storage)
@@ -2144,6 +2168,30 @@ graph_props = (
final_nodes.loc[:, ["id", "degree", "x", "y"]].groupby("id").first().reset_index()
)
final_entities = final_entities.merge(graph_props, on="id", how="left")
+# we're also persistint the frequency column
+final_entities["frequency"] = final_entities["text_unit_ids"].count()
+
+
+# we added children to communities to eliminate query-time reconstruction
+parent_grouped = final_communities.groupby("parent").agg(
+ children=("community", "unique")
+)
+final_communities = final_communities.merge(
+ parent_grouped,
+ left_on="community",
+ right_on="parent",
+ how="left",
+)
+
+# add children to the reports as well
+final_community_reports = final_community_reports.merge(
+ parent_grouped,
+ left_on="community",
+ right_on="parent",
+ how="left",
+)
+
+# copy children into the reports as well
# we renamed all the output files for better clarity now that we don't have workflow naming constraints from DataShaper
await write_table_to_storage(final_documents, "documents", storage)
@@ -2194,7 +2242,7 @@ Cell In[5], line 7
-
+