diff --git a/examples_notebooks/api_overview/index.html b/examples_notebooks/api_overview/index.html index 81252a52..24a72c08 100644 --- a/examples_notebooks/api_overview/index.html +++ b/examples_notebooks/api_overview/index.html @@ -1992,11 +1992,13 @@ from graphrag.index.typing import PipelineRunResult
import yaml
 
-settings = yaml.safe_load(open("<project_directory>/settings.yaml"))  # noqa: PTH123, SIM115
+PROJECT_DIRECTORY = "<project_directory>"
+settings = yaml.safe_load(open(f"{PROJECT_DIRECTORY}/settings.yaml"))  # noqa: PTH123, SIM115
 
import yaml -settings = yaml.safe_load(open("/settings.yaml")) # noqa: PTH123, SIM115
+PROJECT_DIRECTORY = "" +settings = yaml.safe_load(open(f"{PROJECT_DIRECTORY}/settings.yaml")) # noqa: PTH123, SIM115 @@ -2011,9 +2013,10 @@ settings = yaml.safe_load(open("/settings.yaml")) # noqa: PT
 ---------------------------------------------------------------------------
 FileNotFoundError                         Traceback (most recent call last)
-Cell In[3], line 3
+Cell In[3], line 4
       1 import yaml
-----> 3 settings = yaml.safe_load(open("<project_directory>/settings.yaml"))  # noqa: PTH123, SIM115
+      3 PROJECT_DIRECTORY = "<project_directory>"
+----> 4 settings = yaml.safe_load(open(f"{PROJECT_DIRECTORY}/settings.yaml"))  # noqa: PTH123, SIM115
 
 File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:324, in _modified_open(file, *args, **kwargs)
     317 if file in {0, 1, 2}:
@@ -2073,15 +2076,11 @@ File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7
 
 
from graphrag.config.create_graphrag_config import create_graphrag_config
 
-graphrag_config = create_graphrag_config(
-    values=settings, root_dir="<project_directory>"
-)
+graphrag_config = create_graphrag_config(values=settings, root_dir=PROJECT_DIRECTORY)
 
from graphrag.config.create_graphrag_config import create_graphrag_config -graphrag_config = create_graphrag_config( - values=settings, root_dir="" -)
+graphrag_config = create_graphrag_config(values=settings, root_dir=PROJECT_DIRECTORY) @@ -2096,11 +2095,9 @@ graphrag_config = create_graphrag_config(
 ---------------------------------------------------------------------------
 NameError                                 Traceback (most recent call last)
-Cell In[4], line 4
+Cell In[4], line 3
       1 from graphrag.config.create_graphrag_config import create_graphrag_config
-      3 graphrag_config = create_graphrag_config(
-----> 4     values=settings, root_dir="<project_directory>"
-      5 )
+----> 3 graphrag_config = create_graphrag_config(values=settings, root_dir=PROJECT_DIRECTORY)
 
 NameError: name 'settings' is not defined
@@ -2219,20 +2216,14 @@ Cell In[5], line 1
import pandas as pd
 
-final_nodes = pd.read_parquet("<project_directory>/output/create_final_nodes.parquet")
-final_entities = pd.read_parquet(
-    "<project_directory>/output/create_final_entities.parquet"
-)
-final_communities = pd.read_parquet(
-    "<project_directory>/output/create_final_communities.parquet"
-)
+final_entities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/entities.parquet")
+final_communities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/communities.parquet")
 final_community_reports = pd.read_parquet(
-    "<project_directory>/output/create_final_community_reports.parquet"
+    f"{PROJECT_DIRECTORY}/output/community_reports.parquet"
 )
 
 response, context = await api.global_search(
     config=graphrag_config,
-    nodes=final_nodes,
     entities=final_entities,
     communities=final_communities,
     community_reports=final_community_reports,
@@ -2244,20 +2235,14 @@ Cell In[5], line 1
 
import pandas as pd -final_nodes = pd.read_parquet("/output/create_final_nodes.parquet") -final_entities = pd.read_parquet( - "/output/create_final_entities.parquet" -) -final_communities = pd.read_parquet( - "/output/create_final_communities.parquet" -) +final_entities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/entities.parquet") +final_communities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/communities.parquet") final_community_reports = pd.read_parquet( - "/output/create_final_community_reports.parquet" + f"{PROJECT_DIRECTORY}/output/community_reports.parquet" ) response, context = await api.global_search( config=graphrag_config, - nodes=final_nodes, entities=final_entities, communities=final_communities, community_reports=final_community_reports, @@ -2265,7 +2250,7 @@ response, context = await api.global_search( dynamic_community_selection=False, response_type="Multiple Paragraphs", query="Who is Scrooge and what are his main relationships?", -)
+) @@ -2282,13 +2267,11 @@ response, context = await api.global_search( FileNotFoundError Traceback (most recent call last) Cell In[6], line 3 1 import pandas as pd -----> 3 final_nodes = pd.read_parquet("<project_directory>/output/create_final_nodes.parquet") - 4 final_entities = pd.read_parquet( - 5 "<project_directory>/output/create_final_entities.parquet" - 6 ) - 7 final_communities = pd.read_parquet( - 8 "<project_directory>/output/create_final_communities.parquet" - 9 ) +----> 3 final_entities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/entities.parquet") + 4 final_communities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/communities.parquet") + 5 final_community_reports = pd.read_parquet( + 6 f"{PROJECT_DIRECTORY}/output/community_reports.parquet" + 7 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:667, in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs) 664 use_nullable_dtypes = False @@ -2348,7 +2331,7 @@ File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7 883 handles.append(handle) 885 # Convert BytesIO or file objects passed with an encoding -FileNotFoundError: [Errno 2] No such file or directory: '<project_directory>/output/create_final_nodes.parquet'
+FileNotFoundError: [Errno 2] No such file or directory: '<project_directory>/output/entities.parquet' diff --git a/examples_notebooks/drift_search/index.html b/examples_notebooks/drift_search/index.html index cc3d3530..9ae3a4ed 100644 --- a/examples_notebooks/drift_search/index.html +++ b/examples_notebooks/drift_search/index.html @@ -1889,22 +1889,22 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: INPUT_DIR = "./inputs/operation dulce" LANCEDB_URI = f"{INPUT_DIR}/lancedb" -COMMUNITY_REPORT_TABLE = "create_final_community_reports" -ENTITY_TABLE = "create_final_nodes" -ENTITY_EMBEDDING_TABLE = "create_final_entities" -RELATIONSHIP_TABLE = "create_final_relationships" -COVARIATE_TABLE = "create_final_covariates" -TEXT_UNIT_TABLE = "create_final_text_units" +COMMUNITY_REPORT_TABLE = "community_reports" +COMMUNITY_TABLE = "communities" +ENTITY_TABLE = "entities" +RELATIONSHIP_TABLE = "relationships" +COVARIATE_TABLE = "covariates" +TEXT_UNIT_TABLE = "text_units" COMMUNITY_LEVEL = 2 # read nodes table to get community and degree data entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") -entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet") +community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet") print(f"Entity df columns: {entity_df.columns}") -entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL) +entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL) # load description embeddings to an in-memory lancedb vectorstore # to connect to a remote db, specify url and port values. @@ -1959,22 +1959,22 @@ from graphrag.vector_stores.lancedb import LanceDBVectorStore INPUT_DIR = "./inputs/operation dulce" LANCEDB_URI = f"{INPUT_DIR}/lancedb" -COMMUNITY_REPORT_TABLE = "create_final_community_reports" -ENTITY_TABLE = "create_final_nodes" -ENTITY_EMBEDDING_TABLE = "create_final_entities" -RELATIONSHIP_TABLE = "create_final_relationships" -COVARIATE_TABLE = "create_final_covariates" -TEXT_UNIT_TABLE = "create_final_text_units" +COMMUNITY_REPORT_TABLE = "community_reports" +COMMUNITY_TABLE = "communities" +ENTITY_TABLE = "entities" +RELATIONSHIP_TABLE = "relationships" +COVARIATE_TABLE = "covariates" +TEXT_UNIT_TABLE = "text_units" COMMUNITY_LEVEL = 2 # read nodes table to get community and degree data entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") -entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet") +community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet") print(f"Entity df columns: {entity_df.columns}") -entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL) +entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL) # load description embeddings to an in-memory lancedb vectorstore # to connect to a remote db, specify url and port values. @@ -2012,106 +2012,76 @@ text_unit_df.head()
-
@@ -2184,8 +2154,8 @@ text_embedder = OpenAIEmbedding( -
-
+
+ -
- +
+
+
+
+ + +
@@ -2511,10 +2488,16 @@ search = DRIFTSearch( diff --git a/examples_notebooks/global_search/index.html b/examples_notebooks/global_search/index.html index 41af6755..ec8b8d28 100644 --- a/examples_notebooks/global_search/index.html +++ b/examples_notebooks/global_search/index.html @@ -2133,9 +2133,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)
@@ -2161,10 +2161,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)
# parquet files generated from indexing pipeline
 INPUT_DIR = "./inputs/operation dulce"
-COMMUNITY_TABLE = "create_final_communities"
-COMMUNITY_REPORT_TABLE = "create_final_community_reports"
-ENTITY_TABLE = "create_final_nodes"
-ENTITY_EMBEDDING_TABLE = "create_final_entities"
+COMMUNITY_TABLE = "communities"
+COMMUNITY_REPORT_TABLE = "community_reports"
+ENTITY_TABLE = "entities"
 
 # community level in the Leiden community hierarchy from which we will load the community reports
 # higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)
@@ -2172,10 +2171,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)
# parquet files generated from indexing pipeline INPUT_DIR = "./inputs/operation dulce" -COMMUNITY_TABLE = "create_final_communities" -COMMUNITY_REPORT_TABLE = "create_final_community_reports" -ENTITY_TABLE = "create_final_nodes" -ENTITY_EMBEDDING_TABLE = "create_final_entities" +COMMUNITY_TABLE = "communities" +COMMUNITY_REPORT_TABLE = "community_reports" +ENTITY_TABLE = "entities" # community level in the Leiden community hierarchy from which we will load the community reports # higher value means we use reports from more fine-grained communities (at the cost of higher computation cost) @@ -2207,11 +2205,10 @@ COMMUNITY_LEVEL = 2
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
 entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
 report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
-entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
 
-communities = read_indexer_communities(community_df, entity_df, report_df)
-reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
-entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
+communities = read_indexer_communities(community_df, report_df)
+reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
+entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)
 
 print(f"Total report count: {len(report_df)}")
 print(
@@ -2223,11 +2220,10 @@ COMMUNITY_LEVEL = 2
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet") entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet") -entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet") -communities = read_indexer_communities(community_df, entity_df, report_df) -reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL) -entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL) +communities = read_indexer_communities(community_df, report_df) +reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL) +entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL) print(f"Total report count: {len(report_df)}") print( @@ -2245,138 +2241,74 @@ report_df.head()
-
@@ -2393,8 +2325,8 @@ Report count after filtering by community level 2: 56 -
- +
@@ -2512,8 +2467,8 @@ reduce_llm_params = {
-
- +
@@ -2651,486 +2636,17 @@ result.context_data["reports"]
-
@@ -3174,9 +2690,17 @@ print(
diff --git a/examples_notebooks/global_search_with_dynamic_community_selection/index.html b/examples_notebooks/global_search_with_dynamic_community_selection/index.html index 54d315cf..6e8f03a0 100644 --- a/examples_notebooks/global_search_with_dynamic_community_selection/index.html +++ b/examples_notebooks/global_search_with_dynamic_community_selection/index.html @@ -2027,9 +2027,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)
@@ -2055,10 +2055,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)
# parquet files generated from indexing pipeline
 INPUT_DIR = "./inputs/operation dulce"
-COMMUNITY_TABLE = "create_final_communities"
-COMMUNITY_REPORT_TABLE = "create_final_community_reports"
-ENTITY_TABLE = "create_final_nodes"
-ENTITY_EMBEDDING_TABLE = "create_final_entities"
+COMMUNITY_TABLE = "communities"
+COMMUNITY_REPORT_TABLE = "community_reports"
+ENTITY_TABLE = "entities"
 
 # we don't fix a specific community level but instead use an agent to dynamicially
 # search through all the community reports to check if they are relevant.
@@ -2066,10 +2065,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)
# parquet files generated from indexing pipeline INPUT_DIR = "./inputs/operation dulce" -COMMUNITY_TABLE = "create_final_communities" -COMMUNITY_REPORT_TABLE = "create_final_community_reports" -ENTITY_TABLE = "create_final_nodes" -ENTITY_EMBEDDING_TABLE = "create_final_entities" +COMMUNITY_TABLE = "communities" +COMMUNITY_REPORT_TABLE = "community_reports" +ENTITY_TABLE = "entities" # we don't fix a specific community level but instead use an agent to dynamicially # search through all the community reports to check if they are relevant. @@ -2101,17 +2099,16 @@ COMMUNITY_LEVEL = None
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
 entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
 report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
-entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
 
-communities = read_indexer_communities(community_df, entity_df, report_df)
+communities = read_indexer_communities(community_df, report_df)
 reports = read_indexer_reports(
     report_df,
-    entity_df,
+    community_df,
     community_level=COMMUNITY_LEVEL,
     dynamic_community_selection=True,
 )
 entities = read_indexer_entities(
-    entity_df, entity_embedding_df, community_level=COMMUNITY_LEVEL
+    entity_df, community_df, community_level=COMMUNITY_LEVEL
 )
 
 print(f"Total report count: {len(report_df)}")
@@ -2124,17 +2121,16 @@ COMMUNITY_LEVEL = None
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet") entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet") -entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet") -communities = read_indexer_communities(community_df, entity_df, report_df) +communities = read_indexer_communities(community_df, report_df) reports = read_indexer_reports( report_df, - entity_df, + community_df, community_level=COMMUNITY_LEVEL, dynamic_community_selection=True, ) entities = read_indexer_entities( - entity_df, entity_embedding_df, community_level=COMMUNITY_LEVEL + entity_df, community_df, community_level=COMMUNITY_LEVEL ) print(f"Total report count: {len(report_df)}") @@ -2153,138 +2149,74 @@ report_df.head()
-
@@ -2303,8 +2235,8 @@ Report count after filtering by community level None: 72 -
- +
@@ -2448,8 +2415,8 @@ reduce_llm_params = {
-
- +
@@ -2587,70 +2584,17 @@ result.context_data["reports"]
-
@@ -2722,12 +2666,17 @@ print(
diff --git a/examples_notebooks/index_migration/index.html b/examples_notebooks/index_migration_to_v1/index.html similarity index 98% rename from examples_notebooks/index_migration/index.html rename to examples_notebooks/index_migration_to_v1/index.html index 4eaaa3ba..a1c0a3f6 100644 --- a/examples_notebooks/index_migration/index.html +++ b/examples_notebooks/index_migration_to_v1/index.html @@ -16,7 +16,7 @@ - Index migration - GraphRAG + Index migration to v1 - GraphRAG @@ -72,7 +72,7 @@
- + Skip to content @@ -106,7 +106,7 @@
- Index migration + Index migration to v1
@@ -1352,9 +1352,9 @@
  • - + - Index Migration + Index Migration (pre-v1 to v1) @@ -1378,7 +1378,7 @@ -

    Index migration

    +

    Index migration to v1

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + + + +
    + + +
    + +
    + + + + + + + + + +
    +
    + + + +
    +
    +
    + + + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Index migration to v2

    + + + + + + + + + + + + +
    + +
    + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + + + \ No newline at end of file diff --git a/examples_notebooks/local_search/index.html b/examples_notebooks/local_search/index.html index 1f1a8da0..8d4ce9fa 100644 --- a/examples_notebooks/local_search/index.html +++ b/examples_notebooks/local_search/index.html @@ -2263,23 +2263,23 @@ from graphrag.vector_stores.lancedb import LanceDBVectorStore
INPUT_DIR = "./inputs/operation dulce"
 LANCEDB_URI = f"{INPUT_DIR}/lancedb"
 
-COMMUNITY_REPORT_TABLE = "create_final_community_reports"
-ENTITY_TABLE = "create_final_nodes"
-ENTITY_EMBEDDING_TABLE = "create_final_entities"
-RELATIONSHIP_TABLE = "create_final_relationships"
-COVARIATE_TABLE = "create_final_covariates"
-TEXT_UNIT_TABLE = "create_final_text_units"
+COMMUNITY_REPORT_TABLE = "community_reports"
+ENTITY_TABLE = "entities"
+COMMUNITY_TABLE = "communities"
+RELATIONSHIP_TABLE = "relationships"
+COVARIATE_TABLE = "covariates"
+TEXT_UNIT_TABLE = "text_units"
 COMMUNITY_LEVEL = 2
 
INPUT_DIR = "./inputs/operation dulce" LANCEDB_URI = f"{INPUT_DIR}/lancedb" -COMMUNITY_REPORT_TABLE = "create_final_community_reports" -ENTITY_TABLE = "create_final_nodes" -ENTITY_EMBEDDING_TABLE = "create_final_entities" -RELATIONSHIP_TABLE = "create_final_relationships" -COVARIATE_TABLE = "create_final_covariates" -TEXT_UNIT_TABLE = "create_final_text_units" +COMMUNITY_REPORT_TABLE = "community_reports" +ENTITY_TABLE = "entities" +COMMUNITY_TABLE = "communities" +RELATIONSHIP_TABLE = "relationships" +COVARIATE_TABLE = "covariates" +TEXT_UNIT_TABLE = "text_units" COMMUNITY_LEVEL = 2
@@ -2318,9 +2318,9 @@ COMMUNITY_LEVEL = 2
# read nodes table to get community and degree data
 entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
-entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
+community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
 
-entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
+entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)
 
 # load description embeddings to an in-memory lancedb vectorstore
 # to connect to a remote db, specify url and port values.
@@ -2334,9 +2334,9 @@ COMMUNITY_LEVEL = 2
# read nodes table to get community and degree data entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") -entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet") +community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet") -entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL) +entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL) # load description embeddings to an in-memory lancedb vectorstore # to connect to a remote db, specify url and port values. @@ -2357,101 +2357,75 @@ entity_df.head()
-
@@ -2508,101 +2482,74 @@ relationship_df.head()
-
@@ -2654,9 +2601,76 @@ covariates = {"claims": claims}
@@ -2693,13 +2707,13 @@ covariates = {"claims": claims}
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
-reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
+reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
 
 print(f"Report records: {len(report_df)}")
 report_df.head()
 
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet") -reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL) +reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL) print(f"Report records: {len(report_df)}") report_df.head()
@@ -2713,137 +2727,74 @@ report_df.head()
-
@@ -2900,101 +2851,74 @@ text_unit_df.head()
-
@@ -3078,8 +3002,8 @@ text_embedder = OpenAIEmbedding( -
- +
@@ -3233,8 +3186,8 @@ llm_params = {
-
- +
@@ -3319,29 +3297,15 @@ print(result.response)
@@ -3383,29 +3347,16 @@ print(result.response)
@@ -3452,78 +3403,16 @@ In summary, Dr. Jordan Hayes is a pivotal member of the Paranormal Military Squa
-
@@ -3559,97 +3448,23 @@ In summary, Dr. Jordan Hayes is a pivotal member of the Paranormal Military Squa
-
- -
- +
-
- +