From 979141458136a0fe09a1a835b52fe548b7376d18 Mon Sep 17 00:00:00 2001 From: natoverse Date: Fri, 14 Feb 2025 01:05:46 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20microsof?= =?UTF-8?q?t/graphrag@981fd31963d61ed212451b1ad0f5933962a159bb=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../index_migration_to_v2/index.html | 58 +++++++++++++++++-- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/examples_notebooks/index_migration_to_v2/index.html b/examples_notebooks/index_migration_to_v2/index.html index 109c4d67..d90ea3e3 100644 --- a/examples_notebooks/index_migration_to_v2/index.html +++ b/examples_notebooks/index_migration_to_v2/index.html @@ -1915,11 +1915,11 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
# This is the directory that has your settings.yaml
-PROJECT_DIRECTORY = "<your project directory"
+PROJECT_DIRECTORY = "<your project directory>"
 
# This is the directory that has your settings.yaml -PROJECT_DIRECTORY = " -
+PROJECT_DIRECTORY = "" + @@ -2016,7 +2016,7 @@ File ~/work/graphrag/graphrag/graphrag/config/load_c 42 for file in _default_config_files: 43 if (root / file).is_file(): -FileNotFoundError: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory is not a directory +FileNotFoundError: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory> is not a directory @@ -2099,6 +2099,30 @@ File ~/work/graphrag/graphrag/graphrag/config/load_c final_nodes.loc[:, ["id", "degree", "x", "y"]].groupby("id").first().reset_index() ) final_entities = final_entities.merge(graph_props, on="id", how="left") +# we're also persistint the frequency column +final_entities["frequency"] = final_entities["text_unit_ids"].count() + + +# we added children to communities to eliminate query-time reconstruction +parent_grouped = final_communities.groupby("parent").agg( + children=("community", "unique") +) +final_communities = final_communities.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# add children to the reports as well +final_community_reports = final_community_reports.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# copy children into the reports as well # we renamed all the output files for better clarity now that we don't have workflow naming constraints from DataShaper await write_table_to_storage(final_documents, "documents", storage) @@ -2144,6 +2168,30 @@ graph_props = ( final_nodes.loc[:, ["id", "degree", "x", "y"]].groupby("id").first().reset_index() ) final_entities = final_entities.merge(graph_props, on="id", how="left") +# we're also persistint the frequency column +final_entities["frequency"] = final_entities["text_unit_ids"].count() + + +# we added children to communities to eliminate query-time reconstruction +parent_grouped = final_communities.groupby("parent").agg( + children=("community", "unique") +) +final_communities = final_communities.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# add children to the reports as well +final_community_reports = final_community_reports.merge( + parent_grouped, + left_on="community", + right_on="parent", + how="left", +) + +# copy children into the reports as well # we renamed all the output files for better clarity now that we don't have workflow naming constraints from DataShaper await write_table_to_storage(final_documents, "documents", storage) @@ -2194,7 +2242,7 @@ Cell In[5], line 7 - +