diff --git a/examples_notebooks/index_migration_to_v2/index.html b/examples_notebooks/index_migration_to_v2/index.html index 109c4d67..d90ea3e3 100644 --- a/examples_notebooks/index_migration_to_v2/index.html +++ b/examples_notebooks/index_migration_to_v2/index.html @@ -1915,11 +1915,11 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
# This is the directory that has your settings.yaml
-PROJECT_DIRECTORY = "<your project directory"
+PROJECT_DIRECTORY = "<your project directory>"
 
# This is the directory that has your settings.yaml -PROJECT_DIRECTORY = " -
+PROJECT_DIRECTORY = "" + @@ -2016,7 +2016,7 @@ File ~/work/graphrag/graphrag/graphrag/config/load_c 42 for file in _default_config_files: 43 if (root / file).is_file(): -FileNotFoundError: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory is not a directory +FileNotFoundError: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory> is not a directory @@ -2099,6 +2099,30 @@ File ~/work/graphrag/graphrag/graphrag/config/load_c final_nodes.loc[:, ["id", "degree", "x", "y"]].groupby("id").first().reset_index() ) final_entities = final_entities.merge(graph_props, on="id", how="left") +# we're also persistint the frequency column +final_entities["frequency"] = final_entities["text_unit_ids"].count() + + +# we added children to communities to eliminate query-time reconstruction +parent_grouped = final_communities.groupby("parent").agg( + children=("community", "unique") +) +final_communities = final_communities.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# add children to the reports as well +final_community_reports = final_community_reports.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# copy children into the reports as well # we renamed all the output files for better clarity now that we don't have workflow naming constraints from DataShaper await write_table_to_storage(final_documents, "documents", storage) @@ -2144,6 +2168,30 @@ graph_props = ( final_nodes.loc[:, ["id", "degree", "x", "y"]].groupby("id").first().reset_index() ) final_entities = final_entities.merge(graph_props, on="id", how="left") +# we're also persistint the frequency column +final_entities["frequency"] = final_entities["text_unit_ids"].count() + + +# we added children to communities to eliminate query-time reconstruction +parent_grouped = final_communities.groupby("parent").agg( + children=("community", "unique") +) +final_communities = final_communities.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# add children to the reports as well +final_community_reports = final_community_reports.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# copy children into the reports as well # we renamed all the output files for better clarity now that we don't have workflow naming constraints from DataShaper await write_table_to_storage(final_documents, "documents", storage) @@ -2194,7 +2242,7 @@ Cell In[5], line 7 - +