mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
Deploying to gh-pages from @ microsoft/graphrag@c02ab0984a 🚀
This commit is contained in:
parent
0612b6ddb9
commit
7896e8e8cc
@ -1992,11 +1992,13 @@ from graphrag.index.typing import PipelineRunResult</div>
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">yaml</span>
|
||||
|
||||
<span class="n">settings</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">safe_load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s2">"<project_directory>/settings.yaml"</span><span class="p">))</span> <span class="c1"># noqa: PTH123, SIM115</span>
|
||||
<span class="n">PROJECT_DIRECTORY</span> <span class="o">=</span> <span class="s2">"<project_directory>"</span>
|
||||
<span class="n">settings</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">safe_load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">PROJECT_DIRECTORY</span><span class="si">}</span><span class="s2">/settings.yaml"</span><span class="p">))</span> <span class="c1"># noqa: PTH123, SIM115</span>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-3">import yaml
|
||||
|
||||
settings = yaml.safe_load(open("<project_directory>/settings.yaml")) # noqa: PTH123, SIM115</project_directory></div>
|
||||
PROJECT_DIRECTORY = "<project_directory>"
|
||||
settings = yaml.safe_load(open(f"{PROJECT_DIRECTORY}/settings.yaml")) # noqa: PTH123, SIM115</project_directory></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2011,9 +2013,10 @@ settings = yaml.safe_load(open("<project_directory>/settings.yaml")) # noqa: PT
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[3], line 3</span>
|
||||
Cell <span class="ansi-green-fg">In[3], line 4</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">import</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">yaml</span>
|
||||
<span class="ansi-green-fg">----> 3</span> settings <span style="color: rgb(98,98,98)">=</span> yaml<span style="color: rgb(98,98,98)">.</span>safe_load(<span class="ansi-yellow-bg" style="color: rgb(0,135,0)">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)"><project_directory>/settings.yaml</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>) <span style="color: rgb(95,135,135)"># noqa: PTH123, SIM115</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> PROJECT_DIRECTORY <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)"><project_directory></span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-fg">----> 4</span> settings <span style="color: rgb(98,98,98)">=</span> yaml<span style="color: rgb(98,98,98)">.</span>safe_load(<span class="ansi-yellow-bg" style="color: rgb(0,135,0)">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">f</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">PROJECT_DIRECTORY</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">/settings.yaml</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>) <span style="color: rgb(95,135,135)"># noqa: PTH123, SIM115</span>
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:324</span>, in <span class="ansi-cyan-fg">_modified_open</span><span class="ansi-blue-fg">(file, *args, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 317</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> file <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> {<span style="color: rgb(98,98,98)">0</span>, <span style="color: rgb(98,98,98)">1</span>, <span style="color: rgb(98,98,98)">2</span>}:
|
||||
@ -2073,15 +2076,11 @@ File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.config.create_graphrag_config</span><span class="w"> </span><span class="kn">import</span> <span class="n">create_graphrag_config</span>
|
||||
|
||||
<span class="n">graphrag_config</span> <span class="o">=</span> <span class="n">create_graphrag_config</span><span class="p">(</span>
|
||||
<span class="n">values</span><span class="o">=</span><span class="n">settings</span><span class="p">,</span> <span class="n">root_dir</span><span class="o">=</span><span class="s2">"<project_directory>"</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">graphrag_config</span> <span class="o">=</span> <span class="n">create_graphrag_config</span><span class="p">(</span><span class="n">values</span><span class="o">=</span><span class="n">settings</span><span class="p">,</span> <span class="n">root_dir</span><span class="o">=</span><span class="n">PROJECT_DIRECTORY</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-4">from graphrag.config.create_graphrag_config import create_graphrag_config
|
||||
|
||||
graphrag_config = create_graphrag_config(
|
||||
values=settings, root_dir="<project_directory>"
|
||||
)</project_directory></div>
|
||||
graphrag_config = create_graphrag_config(values=settings, root_dir=PROJECT_DIRECTORY)</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2096,11 +2095,9 @@ graphrag_config = create_graphrag_config(
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[4], line 4</span>
|
||||
Cell <span class="ansi-green-fg">In[4], line 3</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">config</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">create_graphrag_config</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> create_graphrag_config
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> graphrag_config <span style="color: rgb(98,98,98)">=</span> create_graphrag_config(
|
||||
<span class="ansi-green-fg">----> 4</span> values<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">settings</span>, root_dir<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)"><project_directory></span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> )
|
||||
<span class="ansi-green-fg">----> 3</span> graphrag_config <span style="color: rgb(98,98,98)">=</span> create_graphrag_config(values<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">settings</span>, root_dir<span style="color: rgb(98,98,98)">=</span>PROJECT_DIRECTORY)
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'settings' is not defined</pre>
|
||||
</div>
|
||||
@ -2219,20 +2216,14 @@ Cell <span class="ansi-green-fg">In[5], line 1</span>
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">pandas</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pd</span>
|
||||
|
||||
<span class="n">final_nodes</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="s2">"<project_directory>/output/create_final_nodes.parquet"</span><span class="p">)</span>
|
||||
<span class="n">final_entities</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span>
|
||||
<span class="s2">"<project_directory>/output/create_final_entities.parquet"</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">final_communities</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span>
|
||||
<span class="s2">"<project_directory>/output/create_final_communities.parquet"</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">final_entities</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">PROJECT_DIRECTORY</span><span class="si">}</span><span class="s2">/output/entities.parquet"</span><span class="p">)</span>
|
||||
<span class="n">final_communities</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">PROJECT_DIRECTORY</span><span class="si">}</span><span class="s2">/output/communities.parquet"</span><span class="p">)</span>
|
||||
<span class="n">final_community_reports</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span>
|
||||
<span class="s2">"<project_directory>/output/create_final_community_reports.parquet"</span>
|
||||
<span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">PROJECT_DIRECTORY</span><span class="si">}</span><span class="s2">/output/community_reports.parquet"</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">response</span><span class="p">,</span> <span class="n">context</span> <span class="o">=</span> <span class="k">await</span> <span class="n">api</span><span class="o">.</span><span class="n">global_search</span><span class="p">(</span>
|
||||
<span class="n">config</span><span class="o">=</span><span class="n">graphrag_config</span><span class="p">,</span>
|
||||
<span class="n">nodes</span><span class="o">=</span><span class="n">final_nodes</span><span class="p">,</span>
|
||||
<span class="n">entities</span><span class="o">=</span><span class="n">final_entities</span><span class="p">,</span>
|
||||
<span class="n">communities</span><span class="o">=</span><span class="n">final_communities</span><span class="p">,</span>
|
||||
<span class="n">community_reports</span><span class="o">=</span><span class="n">final_community_reports</span><span class="p">,</span>
|
||||
@ -2244,20 +2235,14 @@ Cell <span class="ansi-green-fg">In[5], line 1</span>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-6">import pandas as pd
|
||||
|
||||
final_nodes = pd.read_parquet("<project_directory>/output/create_final_nodes.parquet")
|
||||
final_entities = pd.read_parquet(
|
||||
"<project_directory>/output/create_final_entities.parquet"
|
||||
)
|
||||
final_communities = pd.read_parquet(
|
||||
"<project_directory>/output/create_final_communities.parquet"
|
||||
)
|
||||
final_entities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/entities.parquet")
|
||||
final_communities = pd.read_parquet(f"{PROJECT_DIRECTORY}/output/communities.parquet")
|
||||
final_community_reports = pd.read_parquet(
|
||||
"<project_directory>/output/create_final_community_reports.parquet"
|
||||
f"{PROJECT_DIRECTORY}/output/community_reports.parquet"
|
||||
)
|
||||
|
||||
response, context = await api.global_search(
|
||||
config=graphrag_config,
|
||||
nodes=final_nodes,
|
||||
entities=final_entities,
|
||||
communities=final_communities,
|
||||
community_reports=final_community_reports,
|
||||
@ -2265,7 +2250,7 @@ response, context = await api.global_search(
|
||||
dynamic_community_selection=False,
|
||||
response_type="Multiple Paragraphs",
|
||||
query="Who is Scrooge and what are his main relationships?",
|
||||
)</project_directory></project_directory></project_directory></project_directory></div>
|
||||
)</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2282,13 +2267,11 @@ response, context = await api.global_search(
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[6], line 3</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">import</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">pandas</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">as</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">pd</span>
|
||||
<span class="ansi-green-fg">----> 3</span> final_nodes <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">pd</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_parquet</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)"><project_directory>/output/create_final_nodes.parquet</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> final_entities <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)"><project_directory>/output/create_final_entities.parquet</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> final_communities <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 8</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)"><project_directory>/output/create_final_communities.parquet</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 9</span> )
|
||||
<span class="ansi-green-fg">----> 3</span> final_entities <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">pd</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_parquet</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">f</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">PROJECT_DIRECTORY</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">/output/entities.parquet</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> final_communities <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>PROJECT_DIRECTORY<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/output/communities.parquet</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> final_community_reports <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> <span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>PROJECT_DIRECTORY<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/output/community_reports.parquet</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> )
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:667</span>, in <span class="ansi-cyan-fg">read_parquet</span><span class="ansi-blue-fg">(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 664</span> use_nullable_dtypes <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
|
||||
@ -2348,7 +2331,7 @@ File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 883</span> handles<span style="color: rgb(98,98,98)">.</span>append(handle)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 885</span> <span style="color: rgb(95,135,135)"># Convert BytesIO or file objects passed with an encoding</span>
|
||||
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: '<project_directory>/output/create_final_nodes.parquet'</pre>
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: '<project_directory>/output/entities.parquet'</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -1889,22 +1889,22 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
||||
<span class="n">INPUT_DIR</span> <span class="o">=</span> <span class="s2">"./inputs/operation dulce"</span>
|
||||
<span class="n">LANCEDB_URI</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/lancedb"</span>
|
||||
|
||||
<span class="n">COMMUNITY_REPORT_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_community_reports"</span>
|
||||
<span class="n">ENTITY_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_nodes"</span>
|
||||
<span class="n">ENTITY_EMBEDDING_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_entities"</span>
|
||||
<span class="n">RELATIONSHIP_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_relationships"</span>
|
||||
<span class="n">COVARIATE_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_covariates"</span>
|
||||
<span class="n">TEXT_UNIT_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_text_units"</span>
|
||||
<span class="n">COMMUNITY_REPORT_TABLE</span> <span class="o">=</span> <span class="s2">"community_reports"</span>
|
||||
<span class="n">COMMUNITY_TABLE</span> <span class="o">=</span> <span class="s2">"communities"</span>
|
||||
<span class="n">ENTITY_TABLE</span> <span class="o">=</span> <span class="s2">"entities"</span>
|
||||
<span class="n">RELATIONSHIP_TABLE</span> <span class="o">=</span> <span class="s2">"relationships"</span>
|
||||
<span class="n">COVARIATE_TABLE</span> <span class="o">=</span> <span class="s2">"covariates"</span>
|
||||
<span class="n">TEXT_UNIT_TABLE</span> <span class="o">=</span> <span class="s2">"text_units"</span>
|
||||
<span class="n">COMMUNITY_LEVEL</span> <span class="o">=</span> <span class="mi">2</span>
|
||||
|
||||
|
||||
<span class="c1"># read nodes table to get community and degree data</span>
|
||||
<span class="n">entity_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ENTITY_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">entity_embedding_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ENTITY_EMBEDDING_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">community_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">COMMUNITY_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Entity df columns: </span><span class="si">{</span><span class="n">entity_df</span><span class="o">.</span><span class="n">columns</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">entities</span> <span class="o">=</span> <span class="n">read_indexer_entities</span><span class="p">(</span><span class="n">entity_df</span><span class="p">,</span> <span class="n">entity_embedding_df</span><span class="p">,</span> <span class="n">COMMUNITY_LEVEL</span><span class="p">)</span>
|
||||
<span class="n">entities</span> <span class="o">=</span> <span class="n">read_indexer_entities</span><span class="p">(</span><span class="n">entity_df</span><span class="p">,</span> <span class="n">community_df</span><span class="p">,</span> <span class="n">COMMUNITY_LEVEL</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># load description embeddings to an in-memory lancedb vectorstore</span>
|
||||
<span class="c1"># to connect to a remote db, specify url and port values.</span>
|
||||
@ -1959,22 +1959,22 @@ from graphrag.vector_stores.lancedb import LanceDBVectorStore
|
||||
INPUT_DIR = "./inputs/operation dulce"
|
||||
LANCEDB_URI = f"{INPUT_DIR}/lancedb"
|
||||
|
||||
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
|
||||
ENTITY_TABLE = "create_final_nodes"
|
||||
ENTITY_EMBEDDING_TABLE = "create_final_entities"
|
||||
RELATIONSHIP_TABLE = "create_final_relationships"
|
||||
COVARIATE_TABLE = "create_final_covariates"
|
||||
TEXT_UNIT_TABLE = "create_final_text_units"
|
||||
COMMUNITY_REPORT_TABLE = "community_reports"
|
||||
COMMUNITY_TABLE = "communities"
|
||||
ENTITY_TABLE = "entities"
|
||||
RELATIONSHIP_TABLE = "relationships"
|
||||
COVARIATE_TABLE = "covariates"
|
||||
TEXT_UNIT_TABLE = "text_units"
|
||||
COMMUNITY_LEVEL = 2
|
||||
|
||||
|
||||
# read nodes table to get community and degree data
|
||||
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
|
||||
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
|
||||
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
|
||||
|
||||
print(f"Entity df columns: {entity_df.columns}")
|
||||
|
||||
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
|
||||
entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)
|
||||
|
||||
# load description embeddings to an in-memory lancedb vectorstore
|
||||
# to connect to a remote db, specify url and port values.
|
||||
@ -2012,106 +2012,76 @@ text_unit_df.head()</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>Entity df columns: Index(['id', 'human_readable_id', 'title', 'community', 'level', 'degree', 'x',
|
||||
'y'],
|
||||
dtype='object')
|
||||
Entity count: 888
|
||||
Relationship count: 812
|
||||
Text unit records: 38
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[2]:</div>
|
||||
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
|
||||
<div>
|
||||
<style scoped="">
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[2], line 37</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 33</span> COMMUNITY_LEVEL <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(98,98,98)">2</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 36</span> <span style="color: rgb(95,135,135)"># read nodes table to get community and degree data</span>
|
||||
<span class="ansi-green-fg">---> 37</span> entity_df <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">pd</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_parquet</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">f</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">INPUT_DIR</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">/</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">ENTITY_TABLE</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">.parquet</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 38</span> community_df <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>INPUT_DIR<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>COMMUNITY_TABLE<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 40</span> <span style="color: rgb(0,135,0)">print</span>(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">Entity df columns: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>entity_df<span style="color: rgb(98,98,98)">.</span>columns<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:667</span>, in <span class="ansi-cyan-fg">read_parquet</span><span class="ansi-blue-fg">(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 664</span> use_nullable_dtypes <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 665</span> check_dtype_backend(dtype_backend)
|
||||
<span class="ansi-green-fg">--> 667</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">impl</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 668</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 669</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 670</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 671</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 672</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 673</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 674</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 675</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 676</span> <span class="ansi-yellow-bg">)</span>
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>id</th>
|
||||
<th>human_readable_id</th>
|
||||
<th>text</th>
|
||||
<th>n_tokens</th>
|
||||
<th>document_ids</th>
|
||||
<th>entity_ids</th>
|
||||
<th>relationship_ids</th>
|
||||
<th>covariate_ids</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>0</th>
|
||||
<td>aa55265004ced76e9050ed4b7a45c0496e10faa0eddb8a...</td>
|
||||
<td>1</td>
|
||||
<td>../\nJACOB COLLIER: Honestly, I think mastery...</td>
|
||||
<td>1200</td>
|
||||
<td>[1e0886ae010728d10b2972f66b88608dc82b8645d3085...</td>
|
||||
<td>[9a062709-56dd-4bf2-8b41-926124b7a6f7, f8c54a6...</td>
|
||||
<td>[9af066c8-031b-4c52-b93b-b37763f6f0f7, 5b15580...</td>
|
||||
<td>[f91209d1-0939-452e-b51b-be1763e2a27d, f2274c3...</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>1</th>
|
||||
<td>7f0fb1d3bf517dc76dffa984eec7a25e851e44ead0df82...</td>
|
||||
<td>2</td>
|
||||
<td>OMBERG: I grew up and started getting into al...</td>
|
||||
<td>1200</td>
|
||||
<td>[1e0886ae010728d10b2972f66b88608dc82b8645d3085...</td>
|
||||
<td>[9a062709-56dd-4bf2-8b41-926124b7a6f7, f8c54a6...</td>
|
||||
<td>[c2ac3612-3aaf-440c-babd-e21f474e0366, 9aab0b4...</td>
|
||||
<td>[13c74c18-439b-4419-8427-4ba826503055, 7120179...</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>2</th>
|
||||
<td>27b739ceeddfa100f7be3cf002fd3a27aea2228f1a02c4...</td>
|
||||
<td>3</td>
|
||||
<td>, you know, and it’s a very linear pathway too...</td>
|
||||
<td>1200</td>
|
||||
<td>[1e0886ae010728d10b2972f66b88608dc82b8645d3085...</td>
|
||||
<td>[9a062709-56dd-4bf2-8b41-926124b7a6f7, f8c54a6...</td>
|
||||
<td>[9af066c8-031b-4c52-b93b-b37763f6f0f7, 5456bcb...</td>
|
||||
<td>[a67f1c21-32ab-4eaa-b063-c815e7f3ea9d]</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>3</th>
|
||||
<td>d97017305e234cc51554d653447d73b58441e1ff0f99e4...</td>
|
||||
<td>4</td>
|
||||
<td>you know, we started taking a lot of parts an...</td>
|
||||
<td>1200</td>
|
||||
<td>[1e0886ae010728d10b2972f66b88608dc82b8645d3085...</td>
|
||||
<td>[9a062709-56dd-4bf2-8b41-926124b7a6f7, f8c54a6...</td>
|
||||
<td>[bac3dd9b-f5c8-4966-9dc3-87f1f5976e36, 4f41be8...</td>
|
||||
<td>[695ac017-7c10-44ad-a681-3b4c1ae86a87]</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>4</th>
|
||||
<td>2b6d29f8a74b16ea9a70423bce803a08a5b9ed4e6a946b...</td>
|
||||
<td>5</td>
|
||||
<td>only so much processing that my mind can do i...</td>
|
||||
<td>1200</td>
|
||||
<td>[1e0886ae010728d10b2972f66b88608dc82b8645d3085...</td>
|
||||
<td>[9a062709-56dd-4bf2-8b41-926124b7a6f7, f8c54a6...</td>
|
||||
<td>[d225ff7b-ca47-4fab-8d9a-4f86111526f8, 3fddb40...</td>
|
||||
<td>[d8a7ad5c-a170-430e-9f9d-902047371ee2]</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:267</span>, in <span class="ansi-cyan-fg">PyArrowImpl.read</span><span class="ansi-blue-fg">(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 264</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> manager <span style="color: rgb(98,98,98)">==</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">array</span><span style="color: rgb(175,0,0)">"</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 265</span> to_pandas_kwargs[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">split_blocks</span><span style="color: rgb(175,0,0)">"</span>] <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">True</span> <span style="color: rgb(95,135,135)"># type: ignore[assignment]</span>
|
||||
<span class="ansi-green-fg">--> 267</span> path_or_handle, handles, filesystem <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">_get_path_or_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 268</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 269</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 270</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 271</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">rb</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 272</span> <span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 273</span> <span class="ansi-bold" style="color: rgb(0,135,0)">try</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 274</span> pa_table <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>api<span style="color: rgb(98,98,98)">.</span>parquet<span style="color: rgb(98,98,98)">.</span>read_table(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 275</span> path_or_handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 276</span> columns<span style="color: rgb(98,98,98)">=</span>columns,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 279</span> <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 280</span> )
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:140</span>, in <span class="ansi-cyan-fg">_get_path_or_handle</span><span class="ansi-blue-fg">(path, fs, storage_options, mode, is_dir)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 130</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 131</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> (
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 132</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> fs
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 133</span> <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> is_dir
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 138</span> <span style="color: rgb(95,135,135)"># fsspec resources can also point to directories</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 139</span> <span style="color: rgb(95,135,135)"># this branch is used for example when reading from non-fsspec URLs</span>
|
||||
<span class="ansi-green-fg">--> 140</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">get_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 141</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path_or_handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">is_text</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">False</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 142</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 143</span> fs <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 144</span> path_or_handle <span style="color: rgb(98,98,98)">=</span> handles<span style="color: rgb(98,98,98)">.</span>handle
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/common.py:882</span>, in <span class="ansi-cyan-fg">get_handle</span><span class="ansi-blue-fg">(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 873</span> handle <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">open</span>(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 874</span> handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 875</span> ioargs<span style="color: rgb(98,98,98)">.</span>mode,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 878</span> newline<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">"</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 879</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 880</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 881</span> <span style="color: rgb(95,135,135)"># Binary mode</span>
|
||||
<span class="ansi-green-fg">--> 882</span> handle <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">ioargs</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 883</span> handles<span style="color: rgb(98,98,98)">.</span>append(handle)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 885</span> <span style="color: rgb(95,135,135)"># Convert BytesIO or file objects passed with an encoding</span>
|
||||
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: './inputs/operation dulce/entities.parquet'</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2184,8 +2154,8 @@ text_embedder = OpenAIEmbedding(
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell-inputWrapper" tabindex="0">
|
||||
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
|
||||
</div>
|
||||
@ -2215,7 +2185,7 @@ text_embedder = OpenAIEmbedding(
|
||||
<span class="n">report_df</span> <span class="o">=</span> <span class="n">read_community_reports</span><span class="p">(</span><span class="n">INPUT_DIR</span><span class="p">)</span>
|
||||
<span class="n">reports</span> <span class="o">=</span> <span class="n">read_indexer_reports</span><span class="p">(</span>
|
||||
<span class="n">report_df</span><span class="p">,</span>
|
||||
<span class="n">entity_df</span><span class="p">,</span>
|
||||
<span class="n">community_df</span><span class="p">,</span>
|
||||
<span class="n">COMMUNITY_LEVEL</span><span class="p">,</span>
|
||||
<span class="n">content_embedding_col</span><span class="o">=</span><span class="s2">"full_content_embeddings"</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
@ -2233,7 +2203,7 @@ text_embedder = OpenAIEmbedding(
|
||||
report_df = read_community_reports(INPUT_DIR)
|
||||
reports = read_indexer_reports(
|
||||
report_df,
|
||||
entity_df,
|
||||
community_df,
|
||||
COMMUNITY_LEVEL,
|
||||
content_embedding_col="full_content_embeddings",
|
||||
)
|
||||
@ -2242,9 +2212,99 @@ read_indexer_report_embeddings(reports, full_content_embedding_store)</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell-outputWrapper">
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[4], line 10</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> input_path <span style="color: rgb(98,98,98)">=</span> Path(input_dir) <span style="color: rgb(98,98,98)">/</span> <span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>community_report_table<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(input_path)
|
||||
<span class="ansi-green-fg">---> 10</span> report_df <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">read_community_reports</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">INPUT_DIR</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> reports <span style="color: rgb(98,98,98)">=</span> read_indexer_reports(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 12</span> report_df,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 13</span> community_df,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 14</span> COMMUNITY_LEVEL,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 15</span> content_embedding_col<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">full_content_embeddings</span><span style="color: rgb(175,0,0)">"</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 16</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 17</span> read_indexer_report_embeddings(reports, full_content_embedding_store)
|
||||
|
||||
Cell <span class="ansi-green-fg">In[4], line 7</span>, in <span class="ansi-cyan-fg">read_community_reports</span><span class="ansi-blue-fg">(input_dir, community_report_table)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> <span style="color: rgb(175,0,0)">"""Embeds the full content of the community reports and saves the DataFrame with embeddings to the output path."""</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> input_path <span style="color: rgb(98,98,98)">=</span> Path(input_dir) <span style="color: rgb(98,98,98)">/</span> <span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>community_report_table<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-fg">----> 7</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">pd</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_parquet</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">input_path</span><span class="ansi-yellow-bg">)</span>
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:667</span>, in <span class="ansi-cyan-fg">read_parquet</span><span class="ansi-blue-fg">(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 664</span> use_nullable_dtypes <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 665</span> check_dtype_backend(dtype_backend)
|
||||
<span class="ansi-green-fg">--> 667</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">impl</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 668</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 669</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 670</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 671</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 672</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 673</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 674</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 675</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 676</span> <span class="ansi-yellow-bg">)</span>
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:267</span>, in <span class="ansi-cyan-fg">PyArrowImpl.read</span><span class="ansi-blue-fg">(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 264</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> manager <span style="color: rgb(98,98,98)">==</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">array</span><span style="color: rgb(175,0,0)">"</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 265</span> to_pandas_kwargs[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">split_blocks</span><span style="color: rgb(175,0,0)">"</span>] <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">True</span> <span style="color: rgb(95,135,135)"># type: ignore[assignment]</span>
|
||||
<span class="ansi-green-fg">--> 267</span> path_or_handle, handles, filesystem <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">_get_path_or_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 268</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 269</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 270</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 271</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">rb</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 272</span> <span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 273</span> <span class="ansi-bold" style="color: rgb(0,135,0)">try</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 274</span> pa_table <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>api<span style="color: rgb(98,98,98)">.</span>parquet<span style="color: rgb(98,98,98)">.</span>read_table(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 275</span> path_or_handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 276</span> columns<span style="color: rgb(98,98,98)">=</span>columns,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 279</span> <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 280</span> )
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:140</span>, in <span class="ansi-cyan-fg">_get_path_or_handle</span><span class="ansi-blue-fg">(path, fs, storage_options, mode, is_dir)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 130</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 131</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> (
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 132</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> fs
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 133</span> <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> is_dir
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 138</span> <span style="color: rgb(95,135,135)"># fsspec resources can also point to directories</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 139</span> <span style="color: rgb(95,135,135)"># this branch is used for example when reading from non-fsspec URLs</span>
|
||||
<span class="ansi-green-fg">--> 140</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">get_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 141</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path_or_handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">is_text</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">False</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 142</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 143</span> fs <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 144</span> path_or_handle <span style="color: rgb(98,98,98)">=</span> handles<span style="color: rgb(98,98,98)">.</span>handle
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/common.py:882</span>, in <span class="ansi-cyan-fg">get_handle</span><span class="ansi-blue-fg">(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 873</span> handle <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">open</span>(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 874</span> handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 875</span> ioargs<span style="color: rgb(98,98,98)">.</span>mode,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 878</span> newline<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">"</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 879</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 880</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 881</span> <span style="color: rgb(95,135,135)"># Binary mode</span>
|
||||
<span class="ansi-green-fg">--> 882</span> handle <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">ioargs</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 883</span> handles<span style="color: rgb(98,98,98)">.</span>append(handle)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 885</span> <span style="color: rgb(95,135,135)"># Convert BytesIO or file objects passed with an encoding</span>
|
||||
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: 'inputs/operation dulce/community_reports.parquet'</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell-inputWrapper" tabindex="0">
|
||||
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
|
||||
</div>
|
||||
@ -2315,6 +2375,43 @@ search = DRIFTSearch(
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell-outputWrapper">
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[5], line 13</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> drift_params <span style="color: rgb(98,98,98)">=</span> DRIFTSearchConfig(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> temperature<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">0</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> max_tokens<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">12_000</span>,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> n<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">1</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 8</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 10</span> context_builder <span style="color: rgb(98,98,98)">=</span> DRIFTSearchContextBuilder(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> chat_llm<span style="color: rgb(98,98,98)">=</span>chat_llm,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 12</span> text_embedder<span style="color: rgb(98,98,98)">=</span>text_embedder,
|
||||
<span class="ansi-green-fg">---> 13</span> entities<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">entities</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 14</span> relationships<span style="color: rgb(98,98,98)">=</span>relationships,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 15</span> reports<span style="color: rgb(98,98,98)">=</span>reports,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 16</span> entity_text_embeddings<span style="color: rgb(98,98,98)">=</span>description_embedding_store,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 17</span> text_units<span style="color: rgb(98,98,98)">=</span>text_units,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 18</span> token_encoder<span style="color: rgb(98,98,98)">=</span>token_encoder,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 19</span> config<span style="color: rgb(98,98,98)">=</span>drift_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 20</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 22</span> search <span style="color: rgb(98,98,98)">=</span> DRIFTSearch(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 23</span> llm<span style="color: rgb(98,98,98)">=</span>chat_llm, context_builder<span style="color: rgb(98,98,98)">=</span>context_builder, token_encoder<span style="color: rgb(98,98,98)">=</span>token_encoder
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 24</span> )
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'entities' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
@ -2349,133 +2446,13 @@ search = DRIFTSearch(
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
0%| | 0/1 [00:00<?, ?it/s]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
100%|██████████| 1/1 [00:12<00:00, 12.82s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
0%| | 0/3 [00:00<?, ?it/s]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
33%|███▎ | 1/3 [00:11<00:22, 11.44s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
67%|██████▋ | 2/3 [00:11<00:04, 4.83s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
100%|██████████| 3/3 [00:11<00:00, 2.78s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
0%| | 0/3 [00:00<?, ?it/s]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
33%|███▎ | 1/3 [00:10<00:20, 10.19s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
67%|██████▋ | 2/3 [00:11<00:05, 5.24s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
100%|██████████| 3/3 [00:12<00:00, 3.10s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
0%| | 0/3 [00:00<?, ?it/s]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
33%|███▎ | 1/3 [00:09<00:18, 9.33s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
67%|██████▋ | 2/3 [00:10<00:04, 4.51s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
100%|██████████| 3/3 [00:10<00:00, 2.62s/it]</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
</pre>
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[6], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> resp <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">await</span> <span class="ansi-yellow-bg">search</span><span style="color: rgb(98,98,98)">.</span>asearch(<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">Who is agent Mercer?</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'search' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2511,10 +2488,16 @@ search = DRIFTSearch(
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[7]:</div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>"Agent Alex Mercer is a central figure at the Dulce Military Base, particularly within the Paranormal Military Squad. He is noted for his leadership in navigating complex interstellar communications and decoding alien transmissions. Mercer's background as a former military officer equips him with strategic thinking and a calm, authoritative demeanor, which significantly influences the squad's operations [Data: Reports (50); Sources (20, 16, 18, 23, 15, 24, 26)].\n\n### Role and Responsibilities\nMercer leads missions that require strategic thinking and adaptability, indicating a background likely involving military training and experience in high-stakes, covert operations. His leadership style is characterized by diplomacy and caution, ensuring mission success while maintaining focus amid high-stakes activities [Data: Reports (50); Sources (20, 16, 18, 23, 15, 24, 26)].\n\n### Collaboration\nHe collaborates with key figures like Taylor Cruz and Dr. Jordan Hayes. While Mercer focuses on strategic decisions, Cruz brings pragmatism, and Dr. Hayes provides technical analysis, highlighting the collaborative nature of their work [Data: Reports (50)].\n\n### Symbolic Presence\nMercer's presence is symbolic of authority and respect, underscoring his influence and the pivotal nature of his leadership in ensuring the success of the squad's mission [Data: Reports (50)].\n\nOverall, Alex Mercer is a prominent leader with a focus on strategic thinking and interstellar diplomacy, although the full scope of his impact and the specific nature of the alien messages he handles remain somewhat mysterious [Data: Reports (50)]."</pre>
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[7], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">resp</span><span style="color: rgb(98,98,98)">.</span>response
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'resp' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2555,11 +2538,11 @@ search = DRIFTSearch(
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">TypeError</span> Traceback (most recent call last)
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[8], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">resp</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">response</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">nodes</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">]</span>[<span style="color: rgb(98,98,98)">0</span>][<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">answer</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">resp</span><span style="color: rgb(98,98,98)">.</span>response[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">nodes</span><span style="color: rgb(175,0,0)">"</span>][<span style="color: rgb(98,98,98)">0</span>][<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">answer</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
|
||||
<span class="ansi-red-fg">TypeError</span>: string indices must be integers, not 'str'</pre>
|
||||
<span class="ansi-red-fg">NameError</span>: name 'resp' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -2133,9 +2133,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)</div>
|
||||
<div class="jp-InputArea jp-Cell-inputArea"><div class="jp-InputPrompt jp-InputArea-prompt">
|
||||
</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput" data-mime-type="text/markdown">
|
||||
<h3 id="load-community-reports-as-context-for-global-search">Load community reports as context for global search<a class="anchor-link" href="#load-community-reports-as-context-for-global-search">¶</a></h3><ul>
|
||||
<li>Load all community reports in the <code>create_final_community_reports</code> table from the GraphRAG, to be used as context data for global search.</li>
|
||||
<li>Load entities from the <code>create_final_nodes</code> and <code>create_final_entities</code> tables from the GraphRAG, to be used for calculating community weights for context ranking. Note that this is optional (if no entities are provided, we will not calculate community weights and only use the rank attribute in the community reports table for context ranking)</li>
|
||||
<li>Load all communities in the <code>create_final_communites</code> table from the GraphRAG, to be used to reconstruct the community graph hierarchy for dynamic community selection.</li>
|
||||
<li>Load all community reports in the <code>community_reports</code> table from GraphRAG, to be used as context data for global search.</li>
|
||||
<li>Load entities from the <code>entities</code> tables from GraphRAG, to be used for calculating community weights for context ranking. Note that this is optional (if no entities are provided, we will not calculate community weights and only use the rank attribute in the community reports table for context ranking)</li>
|
||||
<li>Load all communities in the <code>communities</code> table from the GraphRAG, to be used to reconstruct the community graph hierarchy for dynamic community selection.</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
@ -2161,10 +2161,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)</div>
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="c1"># parquet files generated from indexing pipeline</span>
|
||||
<span class="n">INPUT_DIR</span> <span class="o">=</span> <span class="s2">"./inputs/operation dulce"</span>
|
||||
<span class="n">COMMUNITY_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_communities"</span>
|
||||
<span class="n">COMMUNITY_REPORT_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_community_reports"</span>
|
||||
<span class="n">ENTITY_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_nodes"</span>
|
||||
<span class="n">ENTITY_EMBEDDING_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_entities"</span>
|
||||
<span class="n">COMMUNITY_TABLE</span> <span class="o">=</span> <span class="s2">"communities"</span>
|
||||
<span class="n">COMMUNITY_REPORT_TABLE</span> <span class="o">=</span> <span class="s2">"community_reports"</span>
|
||||
<span class="n">ENTITY_TABLE</span> <span class="o">=</span> <span class="s2">"entities"</span>
|
||||
|
||||
<span class="c1"># community level in the Leiden community hierarchy from which we will load the community reports</span>
|
||||
<span class="c1"># higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)</span>
|
||||
@ -2172,10 +2171,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)</div>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-4"># parquet files generated from indexing pipeline
|
||||
INPUT_DIR = "./inputs/operation dulce"
|
||||
COMMUNITY_TABLE = "create_final_communities"
|
||||
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
|
||||
ENTITY_TABLE = "create_final_nodes"
|
||||
ENTITY_EMBEDDING_TABLE = "create_final_entities"
|
||||
COMMUNITY_TABLE = "communities"
|
||||
COMMUNITY_REPORT_TABLE = "community_reports"
|
||||
ENTITY_TABLE = "entities"
|
||||
|
||||
# community level in the Leiden community hierarchy from which we will load the community reports
|
||||
# higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)
|
||||
@ -2207,11 +2205,10 @@ COMMUNITY_LEVEL = 2</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="n">community_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">COMMUNITY_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">entity_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ENTITY_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">report_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">COMMUNITY_REPORT_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">entity_embedding_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ENTITY_EMBEDDING_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">communities</span> <span class="o">=</span> <span class="n">read_indexer_communities</span><span class="p">(</span><span class="n">community_df</span><span class="p">,</span> <span class="n">entity_df</span><span class="p">,</span> <span class="n">report_df</span><span class="p">)</span>
|
||||
<span class="n">reports</span> <span class="o">=</span> <span class="n">read_indexer_reports</span><span class="p">(</span><span class="n">report_df</span><span class="p">,</span> <span class="n">entity_df</span><span class="p">,</span> <span class="n">COMMUNITY_LEVEL</span><span class="p">)</span>
|
||||
<span class="n">entities</span> <span class="o">=</span> <span class="n">read_indexer_entities</span><span class="p">(</span><span class="n">entity_df</span><span class="p">,</span> <span class="n">entity_embedding_df</span><span class="p">,</span> <span class="n">COMMUNITY_LEVEL</span><span class="p">)</span>
|
||||
<span class="n">communities</span> <span class="o">=</span> <span class="n">read_indexer_communities</span><span class="p">(</span><span class="n">community_df</span><span class="p">,</span> <span class="n">report_df</span><span class="p">)</span>
|
||||
<span class="n">reports</span> <span class="o">=</span> <span class="n">read_indexer_reports</span><span class="p">(</span><span class="n">report_df</span><span class="p">,</span> <span class="n">community_df</span><span class="p">,</span> <span class="n">COMMUNITY_LEVEL</span><span class="p">)</span>
|
||||
<span class="n">entities</span> <span class="o">=</span> <span class="n">read_indexer_entities</span><span class="p">(</span><span class="n">entity_df</span><span class="p">,</span> <span class="n">community_df</span><span class="p">,</span> <span class="n">COMMUNITY_LEVEL</span><span class="p">)</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Total report count: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">report_df</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span>
|
||||
@ -2223,11 +2220,10 @@ COMMUNITY_LEVEL = 2</div>
|
||||
<div class="clipboard-copy-txt" id="cell-5">community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
|
||||
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
|
||||
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
|
||||
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
|
||||
|
||||
communities = read_indexer_communities(community_df, entity_df, report_df)
|
||||
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
|
||||
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
|
||||
communities = read_indexer_communities(community_df, report_df)
|
||||
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
|
||||
entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)
|
||||
|
||||
print(f"Total report count: {len(report_df)}")
|
||||
print(
|
||||
@ -2245,138 +2241,74 @@ report_df.head()</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>Total report count: 72
|
||||
Report count after filtering by community level 2: 56
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[5]:</div>
|
||||
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
|
||||
<div>
|
||||
<style scoped="">
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[5], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> community_df <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">pd</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_parquet</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">f</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">INPUT_DIR</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">/</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">COMMUNITY_TABLE</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">.parquet</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> entity_df <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>INPUT_DIR<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>ENTITY_TABLE<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> report_df <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>INPUT_DIR<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>COMMUNITY_REPORT_TABLE<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:667</span>, in <span class="ansi-cyan-fg">read_parquet</span><span class="ansi-blue-fg">(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 664</span> use_nullable_dtypes <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 665</span> check_dtype_backend(dtype_backend)
|
||||
<span class="ansi-green-fg">--> 667</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">impl</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 668</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 669</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 670</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 671</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 672</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 673</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 674</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 675</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 676</span> <span class="ansi-yellow-bg">)</span>
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>id</th>
|
||||
<th>human_readable_id</th>
|
||||
<th>community</th>
|
||||
<th>parent</th>
|
||||
<th>level</th>
|
||||
<th>title</th>
|
||||
<th>summary</th>
|
||||
<th>full_content</th>
|
||||
<th>rank</th>
|
||||
<th>rank_explanation</th>
|
||||
<th>findings</th>
|
||||
<th>full_content_json</th>
|
||||
<th>period</th>
|
||||
<th>size</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>0</th>
|
||||
<td>16949a5d17b740b2b4a6f787b0a637f1</td>
|
||||
<td>43</td>
|
||||
<td>43</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>Ben Bloomberg and the Harmoniser Project</td>
|
||||
<td>The community centers around Ben Bloomberg, a ...</td>
|
||||
<td># Ben Bloomberg and the Harmoniser Project\n\n...</td>
|
||||
<td>7.5</td>
|
||||
<td>The impact severity rating is high due to the ...</td>
|
||||
<td>[{'explanation': 'Ben Bloomberg is a pivotal f...</td>
|
||||
<td>{\n "title": "Ben Bloomberg and the Harmoni...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>1</th>
|
||||
<td>4ff756b7041f4dcab6612e016af2b14d</td>
|
||||
<td>44</td>
|
||||
<td>44</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>North Hampton and Influential Musicians</td>
|
||||
<td>The community centers around North Hampton, a ...</td>
|
||||
<td># North Hampton and Influential Musicians\n\nT...</td>
|
||||
<td>6.5</td>
|
||||
<td>The impact severity rating is moderately high ...</td>
|
||||
<td>[{'explanation': 'North Hampton serves as the ...</td>
|
||||
<td>{\n "title": "North Hampton and Influential...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>2</th>
|
||||
<td>2d3df394272743a781606ad80ccb5312</td>
|
||||
<td>45</td>
|
||||
<td>45</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>Prince of Monaco and Monaco</td>
|
||||
<td>The community revolves around the Prince of Mo...</td>
|
||||
<td># Prince of Monaco and Monaco\n\nThe community...</td>
|
||||
<td>4.0</td>
|
||||
<td>The impact severity rating is moderate due to ...</td>
|
||||
<td>[{'explanation': 'The Prince of Monaco is a ke...</td>
|
||||
<td>{\n "title": "Prince of Monaco and Monaco",...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>3</th>
|
||||
<td>becbd958973f42b0bd53cca9250feaf1</td>
|
||||
<td>46</td>
|
||||
<td>46</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>Robot Opera and Broadway</td>
|
||||
<td>The community revolves around the Robot Opera,...</td>
|
||||
<td># Robot Opera and Broadway\n\nThe community re...</td>
|
||||
<td>7.5</td>
|
||||
<td>The impact severity rating is high due to the ...</td>
|
||||
<td>[{'explanation': 'The Robot Opera is a notable...</td>
|
||||
<td>{\n "title": "Robot Opera and Broadway",\n ...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>4</th>
|
||||
<td>f7d29921ae3e41a79ae7f88dae584892</td>
|
||||
<td>47</td>
|
||||
<td>47</td>
|
||||
<td>13</td>
|
||||
<td>2</td>
|
||||
<td>Ben and Jacob's Fusion of Art and Technology</td>
|
||||
<td>The community centers around Ben and Jacob, wh...</td>
|
||||
<td># Ben and Jacob's Fusion of Art and Technology...</td>
|
||||
<td>7.5</td>
|
||||
<td>The impact severity rating is high due to the ...</td>
|
||||
<td>[{'explanation': 'Ben and Jacob are key collab...</td>
|
||||
<td>{\n "title": "Ben and Jacob's Fusion of Art...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:267</span>, in <span class="ansi-cyan-fg">PyArrowImpl.read</span><span class="ansi-blue-fg">(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 264</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> manager <span style="color: rgb(98,98,98)">==</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">array</span><span style="color: rgb(175,0,0)">"</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 265</span> to_pandas_kwargs[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">split_blocks</span><span style="color: rgb(175,0,0)">"</span>] <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">True</span> <span style="color: rgb(95,135,135)"># type: ignore[assignment]</span>
|
||||
<span class="ansi-green-fg">--> 267</span> path_or_handle, handles, filesystem <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">_get_path_or_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 268</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 269</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 270</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 271</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">rb</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 272</span> <span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 273</span> <span class="ansi-bold" style="color: rgb(0,135,0)">try</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 274</span> pa_table <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>api<span style="color: rgb(98,98,98)">.</span>parquet<span style="color: rgb(98,98,98)">.</span>read_table(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 275</span> path_or_handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 276</span> columns<span style="color: rgb(98,98,98)">=</span>columns,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 279</span> <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 280</span> )
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:140</span>, in <span class="ansi-cyan-fg">_get_path_or_handle</span><span class="ansi-blue-fg">(path, fs, storage_options, mode, is_dir)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 130</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 131</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> (
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 132</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> fs
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 133</span> <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> is_dir
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 138</span> <span style="color: rgb(95,135,135)"># fsspec resources can also point to directories</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 139</span> <span style="color: rgb(95,135,135)"># this branch is used for example when reading from non-fsspec URLs</span>
|
||||
<span class="ansi-green-fg">--> 140</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">get_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 141</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path_or_handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">is_text</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">False</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 142</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 143</span> fs <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 144</span> path_or_handle <span style="color: rgb(98,98,98)">=</span> handles<span style="color: rgb(98,98,98)">.</span>handle
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/common.py:882</span>, in <span class="ansi-cyan-fg">get_handle</span><span class="ansi-blue-fg">(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 873</span> handle <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">open</span>(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 874</span> handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 875</span> ioargs<span style="color: rgb(98,98,98)">.</span>mode,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 878</span> newline<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">"</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 879</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 880</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 881</span> <span style="color: rgb(95,135,135)"># Binary mode</span>
|
||||
<span class="ansi-green-fg">--> 882</span> handle <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">ioargs</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 883</span> handles<span style="color: rgb(98,98,98)">.</span>append(handle)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 885</span> <span style="color: rgb(95,135,135)"># Convert BytesIO or file objects passed with an encoding</span>
|
||||
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: './inputs/operation dulce/communities.parquet'</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2393,8 +2325,8 @@ Report count after filtering by community level 2: 56
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell-inputWrapper" tabindex="0">
|
||||
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
|
||||
</div>
|
||||
@ -2429,6 +2361,29 @@ Report count after filtering by community level 2: 56
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell-outputWrapper">
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[6], line 2</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> context_builder <span style="color: rgb(98,98,98)">=</span> GlobalCommunityContext(
|
||||
<span class="ansi-green-fg">----> 2</span> community_reports<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">reports</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> communities<span style="color: rgb(98,98,98)">=</span>communities,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> entities<span style="color: rgb(98,98,98)">=</span>entities, <span style="color: rgb(95,135,135)"># default to None if you don't want to use community weights for ranking</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> token_encoder<span style="color: rgb(98,98,98)">=</span>token_encoder,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> )
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'reports' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell jp-MarkdownCell jp-Notebook-cell">
|
||||
@ -2512,8 +2467,8 @@ reduce_llm_params = {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell-inputWrapper" tabindex="0">
|
||||
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
|
||||
</div>
|
||||
@ -2562,6 +2517,36 @@ reduce_llm_params = {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell-outputWrapper">
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[8], line 3</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> search_engine <span style="color: rgb(98,98,98)">=</span> GlobalSearch(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> llm<span style="color: rgb(98,98,98)">=</span>llm,
|
||||
<span class="ansi-green-fg">----> 3</span> context_builder<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">context_builder</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> token_encoder<span style="color: rgb(98,98,98)">=</span>token_encoder,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> max_data_tokens<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">12_000</span>, <span style="color: rgb(95,135,135)"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> map_llm_params<span style="color: rgb(98,98,98)">=</span>map_llm_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> reduce_llm_params<span style="color: rgb(98,98,98)">=</span>reduce_llm_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 8</span> allow_general_knowledge<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">False</span>, <span style="color: rgb(95,135,135)"># set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 9</span> json_mode<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>, <span style="color: rgb(95,135,135)"># set this to False if your LLM model does not support JSON mode.</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 10</span> context_builder_params<span style="color: rgb(98,98,98)">=</span>context_builder_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> concurrent_coroutines<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">32</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 12</span> response_type<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">multiple paragraphs</span><span style="color: rgb(175,0,0)">"</span>, <span style="color: rgb(95,135,135)"># free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 13</span> )
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'context_builder' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
@ -2603,17 +2588,17 @@ print(result.response)</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>### Cosmic Vocalization: An Overview
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[9], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> result <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">await</span> <span class="ansi-yellow-bg">search_engine</span><span style="color: rgb(98,98,98)">.</span>asearch(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">What is Cosmic Vocalization and who are involved in it?</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> <span style="color: rgb(0,135,0)">print</span>(result<span style="color: rgb(98,98,98)">.</span>response)
|
||||
|
||||
Cosmic Vocalization is a term coined by Jordan Hayes to describe a repeating sequence found in cryptic communications. This concept is pivotal as it serves as a reference point for both humanity and extraterrestrial entities, facilitating a mutual understanding and interpretation of signals exchanged during the Interstellar Duet [Data: Reports (65)]. The idea of Cosmic Vocalization underscores the importance of establishing a common ground in interstellar communications, which is crucial for the success of such exchanges.
|
||||
|
||||
### Key Participants
|
||||
|
||||
The Paranormal Military Squad plays a significant role in activities related to Cosmic Vocalization. They are integral participants in the Galactic Orchestra, which encompasses the Interstellar Duet and the exchange of Harmonious Signals [Data: Reports (65)]. This involvement highlights the strategic importance of Cosmic Vocalization in broader interstellar and paranormal military operations, suggesting that these communications are not only scientific but also have potential military applications.
|
||||
|
||||
In summary, Cosmic Vocalization is a critical concept in the realm of interstellar communication, with the Paranormal Military Squad being key participants in its related activities. This involvement indicates a blend of scientific exploration and strategic military interests in the ongoing efforts to understand and utilize these cryptic communications.
|
||||
</pre>
|
||||
<span class="ansi-red-fg">NameError</span>: name 'search_engine' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2651,486 +2636,17 @@ result.context_data["reports"]</div>
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[10]:</div>
|
||||
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
|
||||
<div>
|
||||
<style scoped="">
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[10], line 2</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span style="color: rgb(95,135,135)"># inspect the data used to build the context for the LLM responses</span>
|
||||
<span class="ansi-green-fg">----> 2</span> <span class="ansi-yellow-bg">result</span><span style="color: rgb(98,98,98)">.</span>context_data[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">reports</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>id</th>
|
||||
<th>title</th>
|
||||
<th>occurrence weight</th>
|
||||
<th>content</th>
|
||||
<th>rank</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>0</th>
|
||||
<td>50</td>
|
||||
<td>Alex Mercer and the Dulce Base Team</td>
|
||||
<td>0.956522</td>
|
||||
<td># Alex Mercer and the Dulce Base Team\n\nThe c...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>1</th>
|
||||
<td>35</td>
|
||||
<td>Kevin Scott and Technology Development</td>
|
||||
<td>0.608696</td>
|
||||
<td># Kevin Scott and Technology Development\n\nTh...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>2</th>
|
||||
<td>53</td>
|
||||
<td>Dulce Base and Paranormal Military Squad</td>
|
||||
<td>0.565217</td>
|
||||
<td># Dulce Base and Paranormal Military Squad\n\n...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>3</th>
|
||||
<td>22</td>
|
||||
<td>Paranormal Military Squad and Technological Ex...</td>
|
||||
<td>0.434783</td>
|
||||
<td># Paranormal Military Squad and Technological ...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>4</th>
|
||||
<td>60</td>
|
||||
<td>First Contact with Extraterrestrial Civilization</td>
|
||||
<td>0.304348</td>
|
||||
<td># First Contact with Extraterrestrial Civiliza...</td>
|
||||
<td>9.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>5</th>
|
||||
<td>61</td>
|
||||
<td>Dulce Base Operations and Distress</td>
|
||||
<td>0.173913</td>
|
||||
<td># Dulce Base Operations and Distress\n\nThe co...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>6</th>
|
||||
<td>57</td>
|
||||
<td>Operation: Dulce in New Mexico</td>
|
||||
<td>0.130435</td>
|
||||
<td># Operation: Dulce in New Mexico\n\nThe commun...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>7</th>
|
||||
<td>48</td>
|
||||
<td>Jacob Collier and Ben Bloomberg's First Tour</td>
|
||||
<td>0.130435</td>
|
||||
<td># Jacob Collier and Ben Bloomberg's First Tour...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>8</th>
|
||||
<td>51</td>
|
||||
<td>Cosmic Translators and Alien Script</td>
|
||||
<td>0.086957</td>
|
||||
<td># Cosmic Translators and Alien Script\n\nThe c...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>9</th>
|
||||
<td>64</td>
|
||||
<td>Terminal and Deep Hum</td>
|
||||
<td>0.086957</td>
|
||||
<td># Terminal and Deep Hum\n\nThe community revol...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>10</th>
|
||||
<td>29</td>
|
||||
<td>Paranormal Military Squad and Cosmic Dialogue</td>
|
||||
<td>0.086957</td>
|
||||
<td># Paranormal Military Squad and Cosmic Dialogu...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>11</th>
|
||||
<td>39</td>
|
||||
<td>Extraterrestrial Signal Decryption Community</td>
|
||||
<td>0.086957</td>
|
||||
<td># Extraterrestrial Signal Decryption Community...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>12</th>
|
||||
<td>34</td>
|
||||
<td>Growth Mindset and Stanford</td>
|
||||
<td>0.086957</td>
|
||||
<td># Growth Mindset and Stanford\n\nThe community...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>13</th>
|
||||
<td>20</td>
|
||||
<td>Jacob Collier and Taylor Swift's Albums</td>
|
||||
<td>0.086957</td>
|
||||
<td># Jacob Collier and Taylor Swift's Albums\n\nT...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>14</th>
|
||||
<td>0</td>
|
||||
<td>Omberg and Jacob Collier Collaboration</td>
|
||||
<td>0.086957</td>
|
||||
<td># Omberg and Jacob Collier Collaboration\n\nTh...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>15</th>
|
||||
<td>65</td>
|
||||
<td>Galactic Orchestra and Interstellar Duet</td>
|
||||
<td>0.043478</td>
|
||||
<td># Galactic Orchestra and Interstellar Duet\n\n...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>16</th>
|
||||
<td>59</td>
|
||||
<td>Alien Intelligence and Interstellar Siren's Call</td>
|
||||
<td>0.043478</td>
|
||||
<td># Alien Intelligence and Interstellar Siren's ...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>17</th>
|
||||
<td>16</td>
|
||||
<td>Jimmy Fallon Project on Primetime Television</td>
|
||||
<td>0.043478</td>
|
||||
<td># Jimmy Fallon Project on Primetime Television...</td>
|
||||
<td>6.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>18</th>
|
||||
<td>42</td>
|
||||
<td>Decryption Process and Digital Soundscape</td>
|
||||
<td>0.043478</td>
|
||||
<td># Decryption Process and Digital Soundscape\n\...</td>
|
||||
<td>6.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>19</th>
|
||||
<td>5</td>
|
||||
<td>Jacob Collier's Video Production</td>
|
||||
<td>0.043478</td>
|
||||
<td># Jacob Collier's Video Production\n\nThe comm...</td>
|
||||
<td>4.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>20</th>
|
||||
<td>14</td>
|
||||
<td>Ben Bloomberg's Phone System and House</td>
|
||||
<td>0.043478</td>
|
||||
<td># Ben Bloomberg's Phone System and House\n\nTh...</td>
|
||||
<td>3.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>21</th>
|
||||
<td>9</td>
|
||||
<td>Jacob Collier's Video Production</td>
|
||||
<td>0.043478</td>
|
||||
<td># Jacob Collier's Video Production\n\nThe comm...</td>
|
||||
<td>3.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>22</th>
|
||||
<td>17</td>
|
||||
<td>Ben Bloomberg's Phone System and Parental Conc...</td>
|
||||
<td>0.043478</td>
|
||||
<td># Ben Bloomberg's Phone System and Parental Co...</td>
|
||||
<td>3.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>23</th>
|
||||
<td>58</td>
|
||||
<td>Paranormal Military Squad and Alien Communicat...</td>
|
||||
<td>0.956522</td>
|
||||
<td># Paranormal Military Squad and Alien Communic...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>24</th>
|
||||
<td>52</td>
|
||||
<td>Paranormal Military Squad at Dulce Base</td>
|
||||
<td>0.695652</td>
|
||||
<td># Paranormal Military Squad at Dulce Base\n\nT...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>25</th>
|
||||
<td>11</td>
|
||||
<td>Jacob Collier and His Musical Collaborations</td>
|
||||
<td>0.565217</td>
|
||||
<td># Jacob Collier and His Musical Collaborations...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>26</th>
|
||||
<td>54</td>
|
||||
<td>Dr. Jordan Hayes and the Paranormal Military S...</td>
|
||||
<td>0.347826</td>
|
||||
<td># Dr. Jordan Hayes and the Paranormal Military...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>27</th>
|
||||
<td>55</td>
|
||||
<td>Operation: Dulce and Paranormal Military Squad</td>
|
||||
<td>0.260870</td>
|
||||
<td># Operation: Dulce and Paranormal Military Squ...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>28</th>
|
||||
<td>68</td>
|
||||
<td>Earth's Interstellar Communication and Galacti...</td>
|
||||
<td>0.260870</td>
|
||||
<td># Earth's Interstellar Communication and Galac...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>29</th>
|
||||
<td>70</td>
|
||||
<td>Paranormal Military Squad and Interstellar Com...</td>
|
||||
<td>0.130435</td>
|
||||
<td># Paranormal Military Squad and Interstellar C...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>30</th>
|
||||
<td>71</td>
|
||||
<td>Threshold and Humankind's Communication with E...</td>
|
||||
<td>0.130435</td>
|
||||
<td># Threshold and Humankind's Communication with...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>31</th>
|
||||
<td>56</td>
|
||||
<td>Dulce Military Base and Paranormal Operations</td>
|
||||
<td>0.130435</td>
|
||||
<td># Dulce Military Base and Paranormal Operation...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>32</th>
|
||||
<td>69</td>
|
||||
<td>Paranormal Military Squad and Interstellar Com...</td>
|
||||
<td>0.130435</td>
|
||||
<td># Paranormal Military Squad and Interstellar C...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>33</th>
|
||||
<td>33</td>
|
||||
<td>Behind the Tech and Microsoft Community</td>
|
||||
<td>0.130435</td>
|
||||
<td># Behind the Tech and Microsoft Community\n\nT...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>34</th>
|
||||
<td>49</td>
|
||||
<td>Djesse Vol. 3 and Djesse Albums Series</td>
|
||||
<td>0.130435</td>
|
||||
<td># Djesse Vol. 3 and Djesse Albums Series\n\nTh...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>35</th>
|
||||
<td>66</td>
|
||||
<td>Humanity and Cosmic Relationships</td>
|
||||
<td>0.086957</td>
|
||||
<td># Humanity and Cosmic Relationships\n\nThe com...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>36</th>
|
||||
<td>19</td>
|
||||
<td>Pandemic and Its Impact on Work and Art</td>
|
||||
<td>0.086957</td>
|
||||
<td># Pandemic and Its Impact on Work and Art\n\nT...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>37</th>
|
||||
<td>67</td>
|
||||
<td>Decryption and Understanding of Alien Signal</td>
|
||||
<td>0.043478</td>
|
||||
<td># Decryption and Understanding of Alien Signal...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>38</th>
|
||||
<td>12</td>
|
||||
<td>Montreux Jazz Festival and Key Performers</td>
|
||||
<td>0.043478</td>
|
||||
<td># Montreux Jazz Festival and Key Performers\n\...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>39</th>
|
||||
<td>46</td>
|
||||
<td>Robot Opera and Broadway</td>
|
||||
<td>0.043478</td>
|
||||
<td># Robot Opera and Broadway\n\nThe community re...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>40</th>
|
||||
<td>21</td>
|
||||
<td>Taylor Swift's Albums and Documentary</td>
|
||||
<td>0.043478</td>
|
||||
<td># Taylor Swift's Albums and Documentary\n\nThe...</td>
|
||||
<td>7.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>41</th>
|
||||
<td>7</td>
|
||||
<td>Stage Equipment and Transportation Network</td>
|
||||
<td>0.043478</td>
|
||||
<td># Stage Equipment and Transportation Network\n...</td>
|
||||
<td>6.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>42</th>
|
||||
<td>37</td>
|
||||
<td>Jaron Lanier and His Collection of Musical Ins...</td>
|
||||
<td>0.043478</td>
|
||||
<td># Jaron Lanier and His Collection of Musical I...</td>
|
||||
<td>4.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>43</th>
|
||||
<td>45</td>
|
||||
<td>Prince of Monaco and Monaco</td>
|
||||
<td>0.043478</td>
|
||||
<td># Prince of Monaco and Monaco\n\nThe community...</td>
|
||||
<td>4.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>44</th>
|
||||
<td>62</td>
|
||||
<td>Paranormal Military Squad at Dulce Base</td>
|
||||
<td>1.000000</td>
|
||||
<td># Paranormal Military Squad at Dulce Base\n\nT...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>45</th>
|
||||
<td>63</td>
|
||||
<td>Paranormal Military Squad and Operation: Dulce</td>
|
||||
<td>0.782609</td>
|
||||
<td># Paranormal Military Squad and Operation: Dul...</td>
|
||||
<td>9.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>46</th>
|
||||
<td>43</td>
|
||||
<td>Ben Bloomberg and the Harmoniser Project</td>
|
||||
<td>0.478261</td>
|
||||
<td># Ben Bloomberg and the Harmoniser Project\n\n...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>47</th>
|
||||
<td>47</td>
|
||||
<td>Ben and Jacob's Fusion of Art and Technology</td>
|
||||
<td>0.173913</td>
|
||||
<td># Ben and Jacob's Fusion of Art and Technology...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>48</th>
|
||||
<td>28</td>
|
||||
<td>Mission to Uncover Dulce's Mysteries</td>
|
||||
<td>0.130435</td>
|
||||
<td># Mission to Uncover Dulce's Mysteries\n\nThe ...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>49</th>
|
||||
<td>18</td>
|
||||
<td>Taylor Swift and Album of the Year</td>
|
||||
<td>0.130435</td>
|
||||
<td># Taylor Swift and Album of the Year\n\nThe co...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>50</th>
|
||||
<td>40</td>
|
||||
<td>Conversation between Kevin Scott and Jacob Col...</td>
|
||||
<td>0.086957</td>
|
||||
<td># Conversation between Kevin Scott and Jacob C...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>51</th>
|
||||
<td>41</td>
|
||||
<td>Humanity and the Unseen Partner</td>
|
||||
<td>0.043478</td>
|
||||
<td># Humanity and the Unseen Partner\n\nThe commu...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>52</th>
|
||||
<td>15</td>
|
||||
<td>Jimmy Fallon Project on Primetime TV</td>
|
||||
<td>0.043478</td>
|
||||
<td># Jimmy Fallon Project on Primetime TV\n\nThe ...</td>
|
||||
<td>7.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>53</th>
|
||||
<td>38</td>
|
||||
<td>Kevin Scott and the Engineering Mindset</td>
|
||||
<td>0.043478</td>
|
||||
<td># Kevin Scott and the Engineering Mindset\n\nT...</td>
|
||||
<td>6.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>54</th>
|
||||
<td>44</td>
|
||||
<td>North Hampton and Influential Musicians</td>
|
||||
<td>0.043478</td>
|
||||
<td># North Hampton and Influential Musicians\n\nT...</td>
|
||||
<td>6.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>55</th>
|
||||
<td>36</td>
|
||||
<td>Kevin Scott's Daughter and Her Fantasy Novel</td>
|
||||
<td>0.043478</td>
|
||||
<td># Kevin Scott's Daughter and Her Fantasy Novel...</td>
|
||||
<td>2.0</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<span class="ansi-red-fg">NameError</span>: name 'result' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -3174,9 +2690,17 @@ print(
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>LLM calls: 4. Prompt tokens: 32951. Output tokens: 505.
|
||||
</pre>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[11], line 3</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span style="color: rgb(95,135,135)"># inspect number of LLM calls and tokens</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> <span style="color: rgb(0,135,0)">print</span>(
|
||||
<span class="ansi-green-fg">----> 3</span> <span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">LLM calls: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">result</span><span style="color: rgb(98,98,98)">.</span>llm_calls<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">. Prompt tokens: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>result<span style="color: rgb(98,98,98)">.</span>prompt_tokens<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">. Output tokens: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>result<span style="color: rgb(98,98,98)">.</span>output_tokens<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> )
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'result' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -2027,9 +2027,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)</div>
|
||||
<div class="jp-InputArea jp-Cell-inputArea"><div class="jp-InputPrompt jp-InputArea-prompt">
|
||||
</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput" data-mime-type="text/markdown">
|
||||
<h3 id="load-community-reports-as-context-for-global-search">Load community reports as context for global search<a class="anchor-link" href="#load-community-reports-as-context-for-global-search">¶</a></h3><ul>
|
||||
<li>Load all community reports in the <code>create_final_community_reports</code> table from the ire-indexing engine, to be used as context data for global search.</li>
|
||||
<li>Load entities from the <code>create_final_nodes</code> and <code>create_final_entities</code> tables from the ire-indexing engine, to be used for calculating community weights for context ranking. Note that this is optional (if no entities are provided, we will not calculate community weights and only use the rank attribute in the community reports table for context ranking)</li>
|
||||
<li>Load all communities in the <code>create_final_communites</code> table from the ire-indexing engine, to be used to reconstruct the community graph hierarchy for dynamic community selection.</li>
|
||||
<li>Load all community reports in the <code>community_reports</code> table from the indexing engine, to be used as context data for global search.</li>
|
||||
<li>Load entities from the <code>entities</code> tables from the indexing engine, to be used for calculating community weights for context ranking. Note that this is optional (if no entities are provided, we will not calculate community weights and only use the rank attribute in the community reports table for context ranking)</li>
|
||||
<li>Load all communities in the <code>communities</code> table from the indexing engine, to be used to reconstruct the community graph hierarchy for dynamic community selection.</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
@ -2055,10 +2055,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)</div>
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="c1"># parquet files generated from indexing pipeline</span>
|
||||
<span class="n">INPUT_DIR</span> <span class="o">=</span> <span class="s2">"./inputs/operation dulce"</span>
|
||||
<span class="n">COMMUNITY_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_communities"</span>
|
||||
<span class="n">COMMUNITY_REPORT_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_community_reports"</span>
|
||||
<span class="n">ENTITY_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_nodes"</span>
|
||||
<span class="n">ENTITY_EMBEDDING_TABLE</span> <span class="o">=</span> <span class="s2">"create_final_entities"</span>
|
||||
<span class="n">COMMUNITY_TABLE</span> <span class="o">=</span> <span class="s2">"communities"</span>
|
||||
<span class="n">COMMUNITY_REPORT_TABLE</span> <span class="o">=</span> <span class="s2">"community_reports"</span>
|
||||
<span class="n">ENTITY_TABLE</span> <span class="o">=</span> <span class="s2">"entities"</span>
|
||||
|
||||
<span class="c1"># we don't fix a specific community level but instead use an agent to dynamicially</span>
|
||||
<span class="c1"># search through all the community reports to check if they are relevant.</span>
|
||||
@ -2066,10 +2065,9 @@ token_encoder = tiktoken.encoding_for_model(llm_model)</div>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-4"># parquet files generated from indexing pipeline
|
||||
INPUT_DIR = "./inputs/operation dulce"
|
||||
COMMUNITY_TABLE = "create_final_communities"
|
||||
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
|
||||
ENTITY_TABLE = "create_final_nodes"
|
||||
ENTITY_EMBEDDING_TABLE = "create_final_entities"
|
||||
COMMUNITY_TABLE = "communities"
|
||||
COMMUNITY_REPORT_TABLE = "community_reports"
|
||||
ENTITY_TABLE = "entities"
|
||||
|
||||
# we don't fix a specific community level but instead use an agent to dynamicially
|
||||
# search through all the community reports to check if they are relevant.
|
||||
@ -2101,17 +2099,16 @@ COMMUNITY_LEVEL = None</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="n">community_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">COMMUNITY_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">entity_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ENTITY_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">report_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">COMMUNITY_REPORT_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
<span class="n">entity_embedding_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">INPUT_DIR</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ENTITY_EMBEDDING_TABLE</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">communities</span> <span class="o">=</span> <span class="n">read_indexer_communities</span><span class="p">(</span><span class="n">community_df</span><span class="p">,</span> <span class="n">entity_df</span><span class="p">,</span> <span class="n">report_df</span><span class="p">)</span>
|
||||
<span class="n">communities</span> <span class="o">=</span> <span class="n">read_indexer_communities</span><span class="p">(</span><span class="n">community_df</span><span class="p">,</span> <span class="n">report_df</span><span class="p">)</span>
|
||||
<span class="n">reports</span> <span class="o">=</span> <span class="n">read_indexer_reports</span><span class="p">(</span>
|
||||
<span class="n">report_df</span><span class="p">,</span>
|
||||
<span class="n">entity_df</span><span class="p">,</span>
|
||||
<span class="n">community_df</span><span class="p">,</span>
|
||||
<span class="n">community_level</span><span class="o">=</span><span class="n">COMMUNITY_LEVEL</span><span class="p">,</span>
|
||||
<span class="n">dynamic_community_selection</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">entities</span> <span class="o">=</span> <span class="n">read_indexer_entities</span><span class="p">(</span>
|
||||
<span class="n">entity_df</span><span class="p">,</span> <span class="n">entity_embedding_df</span><span class="p">,</span> <span class="n">community_level</span><span class="o">=</span><span class="n">COMMUNITY_LEVEL</span>
|
||||
<span class="n">entity_df</span><span class="p">,</span> <span class="n">community_df</span><span class="p">,</span> <span class="n">community_level</span><span class="o">=</span><span class="n">COMMUNITY_LEVEL</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Total report count: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">report_df</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
@ -2124,17 +2121,16 @@ COMMUNITY_LEVEL = None</div>
|
||||
<div class="clipboard-copy-txt" id="cell-5">community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
|
||||
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
|
||||
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
|
||||
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
|
||||
|
||||
communities = read_indexer_communities(community_df, entity_df, report_df)
|
||||
communities = read_indexer_communities(community_df, report_df)
|
||||
reports = read_indexer_reports(
|
||||
report_df,
|
||||
entity_df,
|
||||
community_df,
|
||||
community_level=COMMUNITY_LEVEL,
|
||||
dynamic_community_selection=True,
|
||||
)
|
||||
entities = read_indexer_entities(
|
||||
entity_df, entity_embedding_df, community_level=COMMUNITY_LEVEL
|
||||
entity_df, community_df, community_level=COMMUNITY_LEVEL
|
||||
)
|
||||
|
||||
print(f"Total report count: {len(report_df)}")
|
||||
@ -2153,138 +2149,74 @@ report_df.head()</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>Total report count: 72
|
||||
Report count after filtering by community level None: 72
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[5]:</div>
|
||||
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
|
||||
<div>
|
||||
<style scoped="">
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[5], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> community_df <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">pd</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_parquet</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">f</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">INPUT_DIR</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">/</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">{</span><span class="ansi-yellow-bg">COMMUNITY_TABLE</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(175,95,135)">}</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">.parquet</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> entity_df <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>INPUT_DIR<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>ENTITY_TABLE<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> report_df <span style="color: rgb(98,98,98)">=</span> pd<span style="color: rgb(98,98,98)">.</span>read_parquet(<span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>INPUT_DIR<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">/</span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>COMMUNITY_REPORT_TABLE<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">.parquet</span><span style="color: rgb(175,0,0)">"</span>)
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:667</span>, in <span class="ansi-cyan-fg">read_parquet</span><span class="ansi-blue-fg">(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 664</span> use_nullable_dtypes <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 665</span> check_dtype_backend(dtype_backend)
|
||||
<span class="ansi-green-fg">--> 667</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">impl</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 668</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 669</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">columns</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 670</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filters</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 671</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 672</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">use_nullable_dtypes</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 673</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">dtype_backend</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 674</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 675</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 676</span> <span class="ansi-yellow-bg">)</span>
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>id</th>
|
||||
<th>human_readable_id</th>
|
||||
<th>community</th>
|
||||
<th>parent</th>
|
||||
<th>level</th>
|
||||
<th>title</th>
|
||||
<th>summary</th>
|
||||
<th>full_content</th>
|
||||
<th>rank</th>
|
||||
<th>rank_explanation</th>
|
||||
<th>findings</th>
|
||||
<th>full_content_json</th>
|
||||
<th>period</th>
|
||||
<th>size</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>0</th>
|
||||
<td>16949a5d17b740b2b4a6f787b0a637f1</td>
|
||||
<td>43</td>
|
||||
<td>43</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>Ben Bloomberg and the Harmoniser Project</td>
|
||||
<td>The community centers around Ben Bloomberg, a ...</td>
|
||||
<td># Ben Bloomberg and the Harmoniser Project\n\n...</td>
|
||||
<td>7.5</td>
|
||||
<td>The impact severity rating is high due to the ...</td>
|
||||
<td>[{'explanation': 'Ben Bloomberg is a pivotal f...</td>
|
||||
<td>{\n "title": "Ben Bloomberg and the Harmoni...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>1</th>
|
||||
<td>4ff756b7041f4dcab6612e016af2b14d</td>
|
||||
<td>44</td>
|
||||
<td>44</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>North Hampton and Influential Musicians</td>
|
||||
<td>The community centers around North Hampton, a ...</td>
|
||||
<td># North Hampton and Influential Musicians\n\nT...</td>
|
||||
<td>6.5</td>
|
||||
<td>The impact severity rating is moderately high ...</td>
|
||||
<td>[{'explanation': 'North Hampton serves as the ...</td>
|
||||
<td>{\n "title": "North Hampton and Influential...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>2</th>
|
||||
<td>2d3df394272743a781606ad80ccb5312</td>
|
||||
<td>45</td>
|
||||
<td>45</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>Prince of Monaco and Monaco</td>
|
||||
<td>The community revolves around the Prince of Mo...</td>
|
||||
<td># Prince of Monaco and Monaco\n\nThe community...</td>
|
||||
<td>4.0</td>
|
||||
<td>The impact severity rating is moderate due to ...</td>
|
||||
<td>[{'explanation': 'The Prince of Monaco is a ke...</td>
|
||||
<td>{\n "title": "Prince of Monaco and Monaco",...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>3</th>
|
||||
<td>becbd958973f42b0bd53cca9250feaf1</td>
|
||||
<td>46</td>
|
||||
<td>46</td>
|
||||
<td>10</td>
|
||||
<td>2</td>
|
||||
<td>Robot Opera and Broadway</td>
|
||||
<td>The community revolves around the Robot Opera,...</td>
|
||||
<td># Robot Opera and Broadway\n\nThe community re...</td>
|
||||
<td>7.5</td>
|
||||
<td>The impact severity rating is high due to the ...</td>
|
||||
<td>[{'explanation': 'The Robot Opera is a notable...</td>
|
||||
<td>{\n "title": "Robot Opera and Broadway",\n ...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>4</th>
|
||||
<td>f7d29921ae3e41a79ae7f88dae584892</td>
|
||||
<td>47</td>
|
||||
<td>47</td>
|
||||
<td>13</td>
|
||||
<td>2</td>
|
||||
<td>Ben and Jacob's Fusion of Art and Technology</td>
|
||||
<td>The community centers around Ben and Jacob, wh...</td>
|
||||
<td># Ben and Jacob's Fusion of Art and Technology...</td>
|
||||
<td>7.5</td>
|
||||
<td>The impact severity rating is high due to the ...</td>
|
||||
<td>[{'explanation': 'Ben and Jacob are key collab...</td>
|
||||
<td>{\n "title": "Ben and Jacob's Fusion of Art...</td>
|
||||
<td>2025-01-10</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:267</span>, in <span class="ansi-cyan-fg">PyArrowImpl.read</span><span class="ansi-blue-fg">(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 264</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> manager <span style="color: rgb(98,98,98)">==</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">array</span><span style="color: rgb(175,0,0)">"</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 265</span> to_pandas_kwargs[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">split_blocks</span><span style="color: rgb(175,0,0)">"</span>] <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">True</span> <span style="color: rgb(95,135,135)"># type: ignore[assignment]</span>
|
||||
<span class="ansi-green-fg">--> 267</span> path_or_handle, handles, filesystem <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">_get_path_or_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 268</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 269</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">filesystem</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 270</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 271</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">rb</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">,</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 272</span> <span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 273</span> <span class="ansi-bold" style="color: rgb(0,135,0)">try</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 274</span> pa_table <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>api<span style="color: rgb(98,98,98)">.</span>parquet<span style="color: rgb(98,98,98)">.</span>read_table(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 275</span> path_or_handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 276</span> columns<span style="color: rgb(98,98,98)">=</span>columns,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 279</span> <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 280</span> )
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/parquet.py:140</span>, in <span class="ansi-cyan-fg">_get_path_or_handle</span><span class="ansi-blue-fg">(path, fs, storage_options, mode, is_dir)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 130</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 131</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> (
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 132</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> fs
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 133</span> <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> is_dir
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 138</span> <span style="color: rgb(95,135,135)"># fsspec resources can also point to directories</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 139</span> <span style="color: rgb(95,135,135)"># this branch is used for example when reading from non-fsspec URLs</span>
|
||||
<span class="ansi-green-fg">--> 140</span> handles <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">get_handle</span><span class="ansi-yellow-bg">(</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 141</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path_or_handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">is_text</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">False</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">storage_options</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">storage_options</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 142</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 143</span> fs <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 144</span> path_or_handle <span style="color: rgb(98,98,98)">=</span> handles<span style="color: rgb(98,98,98)">.</span>handle
|
||||
|
||||
File <span class="ansi-green-fg">~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/pandas/io/common.py:882</span>, in <span class="ansi-cyan-fg">get_handle</span><span class="ansi-blue-fg">(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 873</span> handle <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">open</span>(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 874</span> handle,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 875</span> ioargs<span style="color: rgb(98,98,98)">.</span>mode,
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 878</span> newline<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">"</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 879</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 880</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 881</span> <span style="color: rgb(95,135,135)"># Binary mode</span>
|
||||
<span class="ansi-green-fg">--> 882</span> handle <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">handle</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">ioargs</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">mode</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 883</span> handles<span style="color: rgb(98,98,98)">.</span>append(handle)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 885</span> <span style="color: rgb(95,135,135)"># Convert BytesIO or file objects passed with an encoding</span>
|
||||
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: './inputs/operation dulce/communities.parquet'</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2303,8 +2235,8 @@ Report count after filtering by community level None: 72
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell-inputWrapper" tabindex="0">
|
||||
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
|
||||
</div>
|
||||
@ -2365,6 +2297,41 @@ context_builder = GlobalCommunityContext(
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell-outputWrapper">
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[6], line 10</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> mini_llm <span style="color: rgb(98,98,98)">=</span> ChatOpenAI(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> api_key<span style="color: rgb(98,98,98)">=</span>api_key,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> model<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">gpt-4o-mini</span><span style="color: rgb(175,0,0)">"</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> api_type<span style="color: rgb(98,98,98)">=</span>OpenaiApiType<span style="color: rgb(98,98,98)">.</span>OpenAI, <span style="color: rgb(95,135,135)"># OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> max_retries<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">20</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> mini_token_encoder <span style="color: rgb(98,98,98)">=</span> tiktoken<span style="color: rgb(98,98,98)">.</span>encoding_for_model(mini_llm<span style="color: rgb(98,98,98)">.</span>model)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 9</span> context_builder <span style="color: rgb(98,98,98)">=</span> GlobalCommunityContext(
|
||||
<span class="ansi-green-fg">---> 10</span> community_reports<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">reports</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> communities<span style="color: rgb(98,98,98)">=</span>communities,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 12</span> entities<span style="color: rgb(98,98,98)">=</span>entities, <span style="color: rgb(95,135,135)"># default to None if you don't want to use community weights for ranking</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 13</span> token_encoder<span style="color: rgb(98,98,98)">=</span>token_encoder,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 14</span> dynamic_community_selection<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 15</span> dynamic_community_selection_kwargs<span style="color: rgb(98,98,98)">=</span>{
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 16</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">llm</span><span style="color: rgb(175,0,0)">"</span>: mini_llm,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 17</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">token_encoder</span><span style="color: rgb(175,0,0)">"</span>: mini_token_encoder,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 18</span> },
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 19</span> )
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'reports' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell jp-MarkdownCell jp-Notebook-cell">
|
||||
@ -2448,8 +2415,8 @@ reduce_llm_params = {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell-inputWrapper" tabindex="0">
|
||||
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
|
||||
</div>
|
||||
@ -2498,6 +2465,36 @@ reduce_llm_params = {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="jp-Cell-outputWrapper">
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[8], line 3</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> search_engine <span style="color: rgb(98,98,98)">=</span> GlobalSearch(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> llm<span style="color: rgb(98,98,98)">=</span>llm,
|
||||
<span class="ansi-green-fg">----> 3</span> context_builder<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">context_builder</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> token_encoder<span style="color: rgb(98,98,98)">=</span>token_encoder,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> max_data_tokens<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">12_000</span>, <span style="color: rgb(95,135,135)"># change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> map_llm_params<span style="color: rgb(98,98,98)">=</span>map_llm_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> reduce_llm_params<span style="color: rgb(98,98,98)">=</span>reduce_llm_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 8</span> allow_general_knowledge<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">False</span>, <span style="color: rgb(95,135,135)"># set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 9</span> json_mode<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>, <span style="color: rgb(95,135,135)"># set this to False if your LLM model does not support JSON mode.</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 10</span> context_builder_params<span style="color: rgb(98,98,98)">=</span>context_builder_params,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> concurrent_coroutines<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">32</span>,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 12</span> response_type<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">multiple paragraphs</span><span style="color: rgb(175,0,0)">"</span>, <span style="color: rgb(95,135,135)"># free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 13</span> )
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'context_builder' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
<div class="jp-Cell jp-CodeCell jp-Notebook-cell">
|
||||
@ -2539,17 +2536,17 @@ print(result.response)</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>### Cosmic Vocalization: An Overview
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[9], line 1</span>
|
||||
<span class="ansi-green-fg">----> 1</span> result <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">await</span> <span class="ansi-yellow-bg">search_engine</span><span style="color: rgb(98,98,98)">.</span>asearch(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">What is Cosmic Vocalization and who are involved in it?</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> )
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> <span style="color: rgb(0,135,0)">print</span>(result<span style="color: rgb(98,98,98)">.</span>response)
|
||||
|
||||
Cosmic Vocalization is a term coined by Jordan Hayes to describe a repeating sequence found in cryptic communications. This concept is pivotal as it serves as a reference point for both humanity and extraterrestrial entities, facilitating a mutual understanding and interpretation of signals exchanged during what is known as the Interstellar Duet [Data: Reports (65)].
|
||||
|
||||
### Key Involvement
|
||||
|
||||
Jordan Hayes is notably involved in the development and use of the concept of Cosmic Vocalization. They utilize this term to articulate the repeating sequence in these communications, highlighting its significance in bridging the communicative gap between different entities [Data: Reports (65)].
|
||||
|
||||
The involvement of Jordan Hayes underscores the importance of Cosmic Vocalization in the broader context of interstellar communication, suggesting that it may play a crucial role in future interactions and understandings between humans and extraterrestrial beings.
|
||||
</pre>
|
||||
<span class="ansi-red-fg">NameError</span>: name 'search_engine' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2587,70 +2584,17 @@ result.context_data["reports"]</div>
|
||||
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
|
||||
</div>
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[10]:</div>
|
||||
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
|
||||
<div>
|
||||
<style scoped="">
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[10], line 2</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span style="color: rgb(95,135,135)"># inspect the data used to build the context for the LLM responses</span>
|
||||
<span class="ansi-green-fg">----> 2</span> <span class="ansi-yellow-bg">result</span><span style="color: rgb(98,98,98)">.</span>context_data[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">reports</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>id</th>
|
||||
<th>title</th>
|
||||
<th>occurrence weight</th>
|
||||
<th>content</th>
|
||||
<th>rank</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>0</th>
|
||||
<td>52</td>
|
||||
<td>Paranormal Military Squad at Dulce Base</td>
|
||||
<td>1.0000</td>
|
||||
<td># Paranormal Military Squad at Dulce Base\n\nT...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>1</th>
|
||||
<td>53</td>
|
||||
<td>Dulce Base and Paranormal Military Squad</td>
|
||||
<td>0.8125</td>
|
||||
<td># Dulce Base and Paranormal Military Squad\n\n...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>2</th>
|
||||
<td>65</td>
|
||||
<td>Galactic Orchestra and Interstellar Duet</td>
|
||||
<td>0.0625</td>
|
||||
<td># Galactic Orchestra and Interstellar Duet\n\n...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>3</th>
|
||||
<td>67</td>
|
||||
<td>Decryption and Understanding of Alien Signal</td>
|
||||
<td>0.0625</td>
|
||||
<td># Decryption and Understanding of Alien Signal...</td>
|
||||
<td>8.5</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<span class="ansi-red-fg">NameError</span>: name 'result' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2722,12 +2666,17 @@ print(
|
||||
<div class="jp-OutputArea jp-Cell-outputArea">
|
||||
<div class="jp-OutputArea-child">
|
||||
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
|
||||
<pre>Build context (gpt-4o-mini)
|
||||
LLM calls: 32. Prompt tokens: 28389. Output tokens: 2520.
|
||||
Map-reduce (gpt-4o)
|
||||
LLM calls: 2. Prompt tokens: 4041. Output tokens: 314.
|
||||
</pre>
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[11], line 2</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span style="color: rgb(95,135,135)"># inspect number of LLM calls and tokens in dynamic community selection</span>
|
||||
<span class="ansi-green-fg">----> 2</span> llm_calls <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">result</span><span style="color: rgb(98,98,98)">.</span>llm_calls_categories[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">build_context</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> prompt_tokens <span style="color: rgb(98,98,98)">=</span> result<span style="color: rgb(98,98,98)">.</span>prompt_tokens_categories[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">build_context</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> output_tokens <span style="color: rgb(98,98,98)">=</span> result<span style="color: rgb(98,98,98)">.</span>output_tokens_categories[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">build_context</span><span style="color: rgb(175,0,0)">"</span>]
|
||||
|
||||
<span class="ansi-red-fg">NameError</span>: name 'result' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
|
||||
|
||||
<title>Index migration - GraphRAG</title>
|
||||
<title>Index migration to v1 - GraphRAG</title>
|
||||
|
||||
|
||||
|
||||
@ -72,7 +72,7 @@
|
||||
<div data-md-component="skip">
|
||||
|
||||
|
||||
<a href="#index-migration" class="md-skip">
|
||||
<a href="#index-migration-pre-v1-to-v1" class="md-skip">
|
||||
Skip to content
|
||||
</a>
|
||||
|
||||
@ -106,7 +106,7 @@
|
||||
<div class="md-header__topic" data-md-component="header-topic">
|
||||
<span class="md-ellipsis">
|
||||
|
||||
Index migration
|
||||
Index migration to v1
|
||||
|
||||
</span>
|
||||
</div>
|
||||
@ -1352,9 +1352,9 @@
|
||||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#index-migration" class="md-nav__link">
|
||||
<a href="#index-migration-pre-v1-to-v1" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Index Migration
|
||||
Index Migration (pre-v1 to v1)
|
||||
</span>
|
||||
</a>
|
||||
|
||||
@ -1378,7 +1378,7 @@
|
||||
|
||||
|
||||
|
||||
<h1>Index migration</h1>
|
||||
<h1>Index migration to v1</h1>
|
||||
|
||||
<script>
|
||||
(function (global, factory) {
|
||||
@ -1852,7 +1852,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
||||
</div>
|
||||
<div class="jp-InputArea jp-Cell-inputArea"><div class="jp-InputPrompt jp-InputArea-prompt">
|
||||
</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput" data-mime-type="text/markdown">
|
||||
<h2 id="index-migration">Index Migration<a class="anchor-link" href="#index-migration">¶</a></h2><p>This notebook is used to maintain data model parity with older indexes for the latest versions of GraphRAG. If you have a pre-1.0 index and need to migrate without re-running the entire pipeline, you can use this notebook to only update the pieces necessary for alignment.</p>
|
||||
<h2 id="index-migration-pre-v1-to-v1">Index Migration (pre-v1 to v1)<a class="anchor-link" href="#index-migration-pre-v1-to-v1">¶</a></h2><p>This notebook is used to maintain data model parity with older indexes for version 1.0 of GraphRAG. If you have a pre-1.0 index and need to migrate without re-running the entire pipeline, you can use this notebook to only update the pieces necessary for alignment.</p>
|
||||
<p>NOTE: we recommend regenerating your settings.yml with the latest version of GraphRAG using <code>graphrag init</code>. Copy your LLM settings into it before running this notebook. This ensures your config is aligned with the latest version for the migration. This also ensures that you have default vector store config, which is now required or indexing will fail.</p>
|
||||
<p>WARNING: This will overwrite your parquet files, you may want to make a backup!</p>
|
||||
</div>
|
||||
@ -1877,16 +1877,16 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
||||
</div>
|
||||
</clipboard-copy>
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="c1"># This is the directory that has your settings.yml</span>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="c1"># This is the directory that has your settings.yaml</span>
|
||||
<span class="c1"># NOTE: much older indexes may have been output with a timestamped directory</span>
|
||||
<span class="c1"># if this is the case, you will need to make sure the storage.base_dir in settings.yml points to it correctly</span>
|
||||
<span class="n">PROJECT_DIRECTORY</span> <span class="o">=</span> <span class="s2">"<your project directory>"</span>
|
||||
<span class="c1"># if this is the case, you will need to make sure the storage.base_dir in settings.yaml points to it correctly</span>
|
||||
<span class="n">PROJECT_DIRECTORY</span> <span class="o">=</span> <span class="s2">"<your project directory"</span>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-2"># This is the directory that has your settings.yml
|
||||
<div class="clipboard-copy-txt" id="cell-2"># This is the directory that has your settings.yaml
|
||||
# NOTE: much older indexes may have been output with a timestamped directory
|
||||
# if this is the case, you will need to make sure the storage.base_dir in settings.yml points to it correctly
|
||||
PROJECT_DIRECTORY = "<your directory="" project="">"</your></div>
|
||||
</div>
|
||||
# if this is the case, you will need to make sure the storage.base_dir in settings.yaml points to it correctly
|
||||
PROJECT_DIRECTORY = "<your directory"<="" div="" project="">
|
||||
</your></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -1913,29 +1913,25 @@ PROJECT_DIRECTORY = "<your directory="" project="">"</your></div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
|
||||
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.config.load_config</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_config</span>
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.config.resolve_path</span><span class="w"> </span><span class="kn">import</span> <span class="n">resolve_paths</span>
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.storage.factory</span><span class="w"> </span><span class="kn">import</span> <span class="n">StorageFactory</span>
|
||||
|
||||
<span class="c1"># This first block does some config loading, path resolution, and translation that is normally done by the CLI/API when running a full workflow</span>
|
||||
<span class="n">config</span> <span class="o">=</span> <span class="n">load_config</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">PROJECT_DIRECTORY</span><span class="p">))</span>
|
||||
<span class="n">resolve_paths</span><span class="p">(</span><span class="n">config</span><span class="p">)</span>
|
||||
<span class="n">storage_config</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">storage</span><span class="o">.</span><span class="n">model_dump</span><span class="p">()</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">storage_config</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">output</span><span class="o">.</span><span class="n">model_dump</span><span class="p">()</span>
|
||||
<span class="n">storage</span> <span class="o">=</span> <span class="n">StorageFactory</span><span class="p">()</span><span class="o">.</span><span class="n">create_storage</span><span class="p">(</span>
|
||||
<span class="n">storage_type</span><span class="o">=</span><span class="n">storage_config</span><span class="p">[</span><span class="s2">"type"</span><span class="p">],</span> <span class="n">kwargs</span><span class="o">=</span><span class="n">storage_config</span>
|
||||
<span class="n">storage_type</span><span class="o">=</span><span class="n">storage_config</span><span class="p">[</span><span class="s2">"type"</span><span class="p">],</span>
|
||||
<span class="n">kwargs</span><span class="o">=</span><span class="n">storage_config</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-3">from pathlib import Path
|
||||
|
||||
from graphrag.config.load_config import load_config
|
||||
from graphrag.config.resolve_path import resolve_paths
|
||||
from graphrag.storage.factory import StorageFactory
|
||||
|
||||
# This first block does some config loading, path resolution, and translation that is normally done by the CLI/API when running a full workflow
|
||||
config = load_config(Path(PROJECT_DIRECTORY))
|
||||
resolve_paths(config)
|
||||
storage_config = config.storage.model_dump() # type: ignore
|
||||
storage_config = config.output.model_dump()
|
||||
storage = StorageFactory().create_storage(
|
||||
storage_type=storage_config["type"], kwargs=storage_config
|
||||
storage_type=storage_config["type"],
|
||||
kwargs=storage_config,
|
||||
)</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -1950,15 +1946,44 @@ storage = StorageFactory().create_storage(
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">ModuleNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[3], line 4</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">pathlib</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> Path
|
||||
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[3], line 6</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 3</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">config</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">load_config</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> load_config
|
||||
<span class="ansi-green-fg">----> 4</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">config</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">resolve_path</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> resolve_paths
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 5</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">storage</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">factory</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> StorageFactory
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> <span style="color: rgb(95,135,135)"># This first block does some config loading, path resolution, and translation that is normally done by the CLI/API when running a full workflow</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">storage</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">factory</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> StorageFactory
|
||||
<span class="ansi-green-fg">----> 6</span> config <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">load_config</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">Path</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">PROJECT_DIRECTORY</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> storage_config <span style="color: rgb(98,98,98)">=</span> config<span style="color: rgb(98,98,98)">.</span>output<span style="color: rgb(98,98,98)">.</span>model_dump()
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 8</span> storage <span style="color: rgb(98,98,98)">=</span> StorageFactory()<span style="color: rgb(98,98,98)">.</span>create_storage(
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 9</span> storage_type<span style="color: rgb(98,98,98)">=</span>storage_config[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">type</span><span style="color: rgb(175,0,0)">"</span>],
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 10</span> kwargs<span style="color: rgb(98,98,98)">=</span>storage_config,
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> )
|
||||
|
||||
<span class="ansi-red-fg">ModuleNotFoundError</span>: No module named 'graphrag.config.resolve_path'</pre>
|
||||
File <span class="ansi-green-fg">~/work/graphrag/graphrag/graphrag/config/load_config.py:183</span>, in <span class="ansi-cyan-fg">load_config</span><span class="ansi-blue-fg">(root_dir, config_filepath, cli_overrides)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 151</span> <span style="color: rgb(175,0,0)">"""Load configuration from a file.</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 152</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 153</span> <span style="color: rgb(175,0,0)">Parameters</span>
|
||||
<span class="ansi-green-fg"> (...)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 180</span> <span style="color: rgb(175,0,0)"> If there are pydantic validation errors when instantiating the config.</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 181</span> <span style="color: rgb(175,0,0)">"""</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 182</span> root <span style="color: rgb(98,98,98)">=</span> root_dir<span style="color: rgb(98,98,98)">.</span>resolve()
|
||||
<span class="ansi-green-fg">--> 183</span> config_path <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">_get_config_path</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">root</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">config_filepath</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 184</span> _load_dotenv(config_path)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 185</span> config_extension <span style="color: rgb(98,98,98)">=</span> config_path<span style="color: rgb(98,98,98)">.</span>suffix
|
||||
|
||||
File <span class="ansi-green-fg">~/work/graphrag/graphrag/graphrag/config/load_config.py:106</span>, in <span class="ansi-cyan-fg">_get_config_path</span><span class="ansi-blue-fg">(root_dir, config_filepath)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 104</span> <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">FileNotFoundError</span>(msg)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 105</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
|
||||
<span class="ansi-green-fg">--> 106</span> config_path <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">_search_for_config_in_root_dir</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">root_dir</span><span class="ansi-yellow-bg">)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 108</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> config_path:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 109</span> msg <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">Config file not found in root directory: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>root_dir<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)">"</span>
|
||||
|
||||
File <span class="ansi-green-fg">~/work/graphrag/graphrag/graphrag/config/load_config.py:40</span>, in <span class="ansi-cyan-fg">_search_for_config_in_root_dir</span><span class="ansi-blue-fg">(root)</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 38</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> root<span style="color: rgb(98,98,98)">.</span>is_dir():
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 39</span> msg <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">f</span><span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">Invalid config path: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{</span>root<span class="ansi-bold" style="color: rgb(175,95,135)">}</span><span style="color: rgb(175,0,0)"> is not a directory</span><span style="color: rgb(175,0,0)">"</span>
|
||||
<span class="ansi-green-fg">---> 40</span> <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">FileNotFoundError</span>(msg)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 42</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> file <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> _default_config_files:
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 43</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> (root <span style="color: rgb(98,98,98)">/</span> file)<span style="color: rgb(98,98,98)">.</span>is_file():
|
||||
|
||||
<span class="ansi-red-fg">FileNotFoundError</span>: Invalid config path: /home/runner/work/graphrag/graphrag/docs/examples_notebooks/<your project directory is not a directory</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2113,7 +2138,7 @@ Cell <span class="ansi-green-fg">In[3], line 4</span>
|
||||
<span class="k">if</span> <span class="s2">"name"</span> <span class="ow">in</span> <span class="n">final_entities</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
||||
<span class="n">final_entities</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"title"</span><span class="p">},</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">remove_columns</span><span class="p">(</span>
|
||||
<span class="n">final_entities</span><span class="p">,</span> <span class="p">[</span><span class="s2">"mname_embedding"</span><span class="p">,</span> <span class="s2">"graph_embedding"</span><span class="p">,</span> <span class="s2">"description_embedding"</span><span class="p">]</span>
|
||||
<span class="n">final_entities</span><span class="p">,</span> <span class="p">[</span><span class="s2">"name_embedding"</span><span class="p">,</span> <span class="s2">"graph_embedding"</span><span class="p">,</span> <span class="s2">"description_embedding"</span><span class="p">]</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="c1"># Final nodes uses community for joins, which is now an int everywhere</span>
|
||||
@ -2147,6 +2172,15 @@ Cell <span class="ansi-green-fg">In[3], line 4</span>
|
||||
<span class="n">final_communities</span><span class="p">[</span><span class="s2">"id"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">uuid4</span><span class="p">())</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">final_communities</span><span class="p">))]</span>
|
||||
<span class="k">if</span> <span class="s2">"parent"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">final_communities</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
||||
<span class="n">final_communities</span> <span class="o">=</span> <span class="n">final_communities</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">parent_df</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s2">"community"</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">"left"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="s2">"entity_ids"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">final_communities</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
||||
<span class="n">node_mapping</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">final_nodes</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="p">[</span><span class="s2">"community"</span><span class="p">,</span> <span class="s2">"id"</span><span class="p">]]</span>
|
||||
<span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s2">"community"</span><span class="p">)</span>
|
||||
<span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">entity_ids</span><span class="o">=</span><span class="p">(</span><span class="s2">"id"</span><span class="p">,</span> <span class="nb">list</span><span class="p">))</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">final_communities</span> <span class="o">=</span> <span class="n">final_communities</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span>
|
||||
<span class="n">node_mapping</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s2">"community"</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">"left"</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">remove_columns</span><span class="p">(</span><span class="n">final_communities</span><span class="p">,</span> <span class="p">[</span><span class="s2">"raw_community"</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># We need int for community and the human_readable_id copy for consistency</span>
|
||||
@ -2199,7 +2233,7 @@ final_text_units["human_readable_id"] = final_text_units.index + 1
|
||||
if "name" in final_entities.columns:
|
||||
final_entities.rename(columns={"name": "title"}, inplace=True)
|
||||
remove_columns(
|
||||
final_entities, ["mname_embedding", "graph_embedding", "description_embedding"]
|
||||
final_entities, ["name_embedding", "graph_embedding", "description_embedding"]
|
||||
)
|
||||
|
||||
# Final nodes uses community for joins, which is now an int everywhere
|
||||
@ -2233,6 +2267,15 @@ if "community" not in final_communities.columns:
|
||||
final_communities["id"] = [str(uuid4()) for _ in range(len(final_communities))]
|
||||
if "parent" not in final_communities.columns:
|
||||
final_communities = final_communities.merge(parent_df, on="community", how="left")
|
||||
if "entity_ids" not in final_communities.columns:
|
||||
node_mapping = (
|
||||
final_nodes.loc[:, ["community", "id"]]
|
||||
.groupby("community")
|
||||
.agg(entity_ids=("id", list))
|
||||
)
|
||||
final_communities = final_communities.merge(
|
||||
node_mapping, on="community", how="left"
|
||||
)
|
||||
remove_columns(final_communities, ["raw_community"])
|
||||
|
||||
# We need int for community and the human_readable_id copy for consistency
|
||||
@ -2301,7 +2344,7 @@ Cell <span class="ansi-green-fg">In[6], line 8</span>
|
||||
</div>
|
||||
<div class="highlight-ipynb hl-python"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.cache.factory</span><span class="w"> </span><span class="kn">import</span> <span class="n">CacheFactory</span>
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.callbacks.noop_workflow_callbacks</span><span class="w"> </span><span class="kn">import</span> <span class="n">NoopWorkflowCallbacks</span>
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.index.config.embeddings</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_embedded_fields</span><span class="p">,</span> <span class="n">get_embedding_settings</span>
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.config.embeddings</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_embedded_fields</span><span class="p">,</span> <span class="n">get_embedding_settings</span>
|
||||
<span class="kn">from</span><span class="w"> </span><span class="nn">graphrag.index.flows.generate_text_embeddings</span><span class="w"> </span><span class="kn">import</span> <span class="n">generate_text_embeddings</span>
|
||||
|
||||
<span class="c1"># We only need to re-run the embeddings workflow, to ensure that embeddings for all required search fields are in place</span>
|
||||
@ -2334,7 +2377,7 @@ Cell <span class="ansi-green-fg">In[6], line 8</span>
|
||||
</pre></div>
|
||||
<div class="clipboard-copy-txt" id="cell-7">from graphrag.cache.factory import CacheFactory
|
||||
from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks
|
||||
from graphrag.index.config.embeddings import get_embedded_fields, get_embedding_settings
|
||||
from graphrag.config.embeddings import get_embedded_fields, get_embedding_settings
|
||||
from graphrag.index.flows.generate_text_embeddings import generate_text_embeddings
|
||||
|
||||
# We only need to re-run the embeddings workflow, to ensure that embeddings for all required search fields are in place
|
||||
@ -2377,16 +2420,16 @@ await generate_text_embeddings(
|
||||
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
|
||||
<pre>
|
||||
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
||||
<span class="ansi-red-fg">ModuleNotFoundError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[7], line 3</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">cache</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">factory</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> CacheFactory
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 2</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">callbacks</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">noop_workflow_callbacks</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> NoopWorkflowCallbacks
|
||||
<span class="ansi-green-fg">----> 3</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">index</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">config</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">embeddings</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> get_embedded_fields, get_embedding_settings
|
||||
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
||||
Cell <span class="ansi-green-fg">In[7], line 10</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 4</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,0,255)">graphrag</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">index</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">flows</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">generate_text_embeddings</span><span style="color: rgb(188,188,188)"> </span><span class="ansi-bold" style="color: rgb(0,135,0)">import</span> generate_text_embeddings
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 6</span> <span style="color: rgb(95,135,135)"># We only need to re-run the embeddings workflow, to ensure that embeddings for all required search fields are in place</span>
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 7</span> <span style="color: rgb(95,135,135)"># We'll construct the context and run this function flow directly to avoid everything else</span>
|
||||
<span class="ansi-green-fg">---> 10</span> embedded_fields <span style="color: rgb(98,98,98)">=</span> get_embedded_fields(<span class="ansi-yellow-bg">config</span>)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 11</span> text_embed <span style="color: rgb(98,98,98)">=</span> get_embedding_settings(config)
|
||||
<span class="ansi-green-intense-fg ansi-bold"> 12</span> callbacks <span style="color: rgb(98,98,98)">=</span> NoopWorkflowCallbacks()
|
||||
|
||||
<span class="ansi-red-fg">ModuleNotFoundError</span>: No module named 'graphrag.index.config'</pre>
|
||||
<span class="ansi-red-fg">NameError</span>: name 'config' is not defined</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2395,7 +2438,7 @@ Cell <span class="ansi-green-fg">In[7], line 3</span>
|
||||
</div>
|
||||
</div> <!-- jp-Notebook -->
|
||||
</div> <!-- jupyter-wrapper -->
|
||||
|
||||
</div>
|
||||
<style>
|
||||
['pre { line-height: 125%; }\ntd.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\nspan.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\ntd.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\nspan.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n.highlight-ipynb .hll { background-color: var(--jp-cell-editor-active-background) }\n.highlight-ipynb { background: var(--jp-cell-editor-background); color: var(--jp-mirror-editor-variable-color) }\n.highlight-ipynb .c { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment */\n.highlight-ipynb .err { color: var(--jp-mirror-editor-error-color) } /* Error */\n.highlight-ipynb .k { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword */\n.highlight-ipynb .o { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator */\n.highlight-ipynb .p { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation */\n.highlight-ipynb .ch { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment.Hashbang */\n.highlight-ipynb .cm { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment.Multiline */\n.highlight-ipynb .cp { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment.Preproc */\n.highlight-ipynb .cpf { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment.PreprocFile */\n.highlight-ipynb .c1 { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment.Single */\n.highlight-ipynb .cs { color: var(--jp-mirror-editor-comment-color); font-style: italic } /* Comment.Special */\n.highlight-ipynb .kc { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword.Constant */\n.highlight-ipynb .kd { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword.Declaration */\n.highlight-ipynb .kn { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword.Namespace */\n.highlight-ipynb .kp { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword.Pseudo */\n.highlight-ipynb .kr { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword.Reserved */\n.highlight-ipynb .kt { color: var(--jp-mirror-editor-keyword-color); font-weight: bold } /* Keyword.Type */\n.highlight-ipynb .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */\n.highlight-ipynb .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */\n.highlight-ipynb .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */\n.highlight-ipynb .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */\n.highlight-ipynb .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */\n.highlight-ipynb .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */\n.highlight-ipynb .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */\n.highlight-ipynb .mh { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Hex */\n.highlight-ipynb .mi { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Integer */\n.highlight-ipynb .mo { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Oct */\n.highlight-ipynb .sa { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Affix */\n.highlight-ipynb .sb { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Backtick */\n.highlight-ipynb .sc { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Char */\n.highlight-ipynb .dl { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Delimiter */\n.highlight-ipynb .sd { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Doc */\n.highlight-ipynb .s2 { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Double */\n.highlight-ipynb .se { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Escape */\n.highlight-ipynb .sh { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Heredoc */\n.highlight-ipynb .si { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Interpol */\n.highlight-ipynb .sx { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Other */\n.highlight-ipynb .sr { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Regex */\n.highlight-ipynb .s1 { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Single */\n.highlight-ipynb .ss { color: var(--jp-mirror-editor-string-color) } /* Literal.String.Symbol */\n.highlight-ipynb .il { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Integer.Long */']
|
||||
</style>
|
||||
2227
examples_notebooks/index_migration_to_v2/index.html
Normal file
2227
examples_notebooks/index_migration_to_v2/index.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
BIN
sitemap.xml.gz
BIN
sitemap.xml.gz
Binary file not shown.
Loading…
Reference in New Issue
Block a user