Fix drift notebook

This commit is contained in:
Nathan Evans 2025-10-06 15:49:56 -07:00
parent f222444b7a
commit 86382a5ef0

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -12,19 +12,171 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Entity df columns: Index(['id', 'human_readable_id', 'title', 'type', 'description',\n",
" 'text_unit_ids', 'frequency', 'degree', 'x', 'y'],\n",
" dtype='object')\n",
"Entity count: 18\n",
"Relationship count: 54\n",
"Text unit records: 5\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>human_readable_id</th>\n",
" <th>text</th>\n",
" <th>n_tokens</th>\n",
" <th>document_ids</th>\n",
" <th>entity_ids</th>\n",
" <th>relationship_ids</th>\n",
" <th>covariate_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8e938693af886bfd081acbbe8384c3671446bff84a134a...</td>\n",
" <td>1</td>\n",
" <td># Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...</td>\n",
" <td>1200</td>\n",
" <td>[6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8...</td>\n",
" <td>[425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f...</td>\n",
" <td>[2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838...</td>\n",
" <td>[745d28dd-be20-411b-85ff-1c69ca70e7b3, 9cba185...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fd1f46d32e1df6cd429542aeda3d64ddf3745ccb80f443...</td>\n",
" <td>2</td>\n",
" <td>, the hollow echo of the bay a stark reminder ...</td>\n",
" <td>1200</td>\n",
" <td>[6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8...</td>\n",
" <td>[425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f...</td>\n",
" <td>[2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838...</td>\n",
" <td>[4f9b461f-5e8f-465d-9586-e2fc81787062, 0f74618...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7296d9a1f046854d59079dc183de8a054c27c4843d2979...</td>\n",
" <td>3</td>\n",
" <td>differently than praise from others. This was...</td>\n",
" <td>1200</td>\n",
" <td>[6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8...</td>\n",
" <td>[425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f...</td>\n",
" <td>[2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838...</td>\n",
" <td>[3ef1be9c-4080-4fac-99bd-c4a636248904, 8730b20...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ac72722a02ac71242a2a91fca323198d04197daf60515d...</td>\n",
" <td>4</td>\n",
" <td>contrast to the rigid silence enveloping the ...</td>\n",
" <td>1200</td>\n",
" <td>[6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8...</td>\n",
" <td>[425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f...</td>\n",
" <td>[2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838...</td>\n",
" <td>[2c292047-b79a-4958-ab57-7bf7d7a22c92, 3cbd18a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4c277337d461a16aaf8f9760ddb8b44ef220e948a2341d...</td>\n",
" <td>5</td>\n",
" <td>a mask of duty.\\n\\nIn the midst of the descen...</td>\n",
" <td>35</td>\n",
" <td>[6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8...</td>\n",
" <td>[d084d615-3584-4ec8-9931-90aa6075c764, 4b84859...</td>\n",
" <td>[6efdc42e-69a2-47c0-97ec-4b296cd16d5e]</td>\n",
" <td>[db8da02f-f889-4bb5-8e81-ab2a72e380bb]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id human_readable_id \\\n",
"0 8e938693af886bfd081acbbe8384c3671446bff84a134a... 1 \n",
"1 fd1f46d32e1df6cd429542aeda3d64ddf3745ccb80f443... 2 \n",
"2 7296d9a1f046854d59079dc183de8a054c27c4843d2979... 3 \n",
"3 ac72722a02ac71242a2a91fca323198d04197daf60515d... 4 \n",
"4 4c277337d461a16aaf8f9760ddb8b44ef220e948a2341d... 5 \n",
"\n",
" text n_tokens \\\n",
"0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru... 1200 \n",
"1 , the hollow echo of the bay a stark reminder ... 1200 \n",
"2 differently than praise from others. This was... 1200 \n",
"3 contrast to the rigid silence enveloping the ... 1200 \n",
"4 a mask of duty.\\n\\nIn the midst of the descen... 35 \n",
"\n",
" document_ids \\\n",
"0 [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... \n",
"1 [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... \n",
"2 [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... \n",
"3 [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... \n",
"4 [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... \n",
"\n",
" entity_ids \\\n",
"0 [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... \n",
"1 [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... \n",
"2 [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... \n",
"3 [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... \n",
"4 [d084d615-3584-4ec8-9931-90aa6075c764, 4b84859... \n",
"\n",
" relationship_ids \\\n",
"0 [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... \n",
"1 [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... \n",
"2 [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... \n",
"3 [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... \n",
"4 [6efdc42e-69a2-47c0-97ec-4b296cd16d5e] \n",
"\n",
" covariate_ids \n",
"0 [745d28dd-be20-411b-85ff-1c69ca70e7b3, 9cba185... \n",
"1 [4f9b461f-5e8f-465d-9586-e2fc81787062, 0f74618... \n",
"2 [3ef1be9c-4080-4fac-99bd-c4a636248904, 8730b20... \n",
"3 [2c292047-b79a-4958-ab57-7bf7d7a22c92, 3cbd18a... \n",
"4 [db8da02f-f889-4bb5-8e81-ab2a72e380bb] "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"import tiktoken\n",
"\n",
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.drift_search_config import DRIFTSearchConfig\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
"from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.query.indexer_adapters import (\n",
" read_indexer_entities,\n",
@ -37,6 +189,7 @@
" DRIFTSearchContextBuilder,\n",
")\n",
"from graphrag.query.structured_search.drift_search.search import DRIFTSearch\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
"from graphrag.vector_stores.lancedb import LanceDBVectorStore\n",
"\n",
"INPUT_DIR = \"./inputs/operation dulce\"\n",
@ -62,12 +215,16 @@
"# load description embeddings to an in-memory lancedb vectorstore\n",
"# to connect to a remote db, specify url and port values.\n",
"description_embedding_store = LanceDBVectorStore(\n",
" collection_name=\"default-entity-description\",\n",
" vector_store_schema_config=VectorStoreSchemaConfig(\n",
" index_name=\"default-entity-description\"\n",
" ),\n",
")\n",
"description_embedding_store.connect(db_uri=LANCEDB_URI)\n",
"\n",
"full_content_embedding_store = LanceDBVectorStore(\n",
" collection_name=\"default-community-full_content\",\n",
" vector_store_schema_config=VectorStoreSchemaConfig(\n",
" index_name=\"default-community-full_content\"\n",
" )\n",
")\n",
"full_content_embedding_store.connect(db_uri=LANCEDB_URI)\n",
"\n",
@ -91,11 +248,20 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model config based on fnllm is deprecated and will be removed in GraphRAG v3, please use ModelType.Chat or ModelType.Embedding instead to switch to LiteLLM config.\n",
"Model config based on fnllm is deprecated and will be removed in GraphRAG v3, please use ModelType.Chat or ModelType.Embedding instead to switch to LiteLLM config.\n"
]
}
],
"source": [
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"llm_model = os.environ[\"GRAPHRAG_LLM_MODEL\"]\n",
"embedding_model = os.environ[\"GRAPHRAG_EMBEDDING_MODEL\"]\n",
"llm_model = \"gpt-4.1\"\n",
"embedding_model = \"text-embedding-3-small\"\n",
"\n",
"chat_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
@ -109,7 +275,7 @@
" config=chat_config,\n",
")\n",
"\n",
"token_encoder = tiktoken.encoding_for_model(llm_model)\n",
"tokenizer = get_tokenizer(chat_config)\n",
"\n",
"embedding_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
@ -127,7 +293,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -152,7 +318,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -173,38 +339,173 @@
" reports=reports,\n",
" entity_text_embeddings=description_embedding_store,\n",
" text_units=text_units,\n",
" token_encoder=token_encoder,\n",
" tokenizer=tokenizer,\n",
" config=drift_params,\n",
")\n",
"\n",
"search = DRIFTSearch(\n",
" model=chat_model, context_builder=context_builder, token_encoder=token_encoder\n",
" model=chat_model, context_builder=context_builder, tokenizer=tokenizer\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/3 [00:00<?, ?it/s] Reached token limit - reverting to previous context state\n",
"Reached token limit - reverting to previous context state\n",
"Reached token limit - reverting to previous context state\n",
" 0%| | 0/3 [00:00<?, ?it/s] Reached token limit - reverting to previous context state\n",
"Reached token limit - reverting to previous context state\n",
"Reached token limit - reverting to previous context state\n",
" 0%| | 0/3 [00:00<?, ?it/s] Reached token limit - reverting to previous context state\n",
"Reached token limit - reverting to previous context state\n",
"Reached token limit - reverting to previous context state\n",
" \r"
]
}
],
"source": [
"resp = await search.search(\"Who is agent Mercer?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"\"## Agent Mercer: Overview\\n\\nAgent Alex Mercer is a central figure in Operation: Dulce, serving as a key member of the elite Paranormal Military Squad. Mercer is recognized for his leadership qualities, mentorship, and pivotal role in the team's mission to investigate alien technology and uncover the secrets of the mysterious Dulce base [Data: Reports (1)].\\n\\n## Role and Qualities\\n\\n- **Leadership and Mentorship:** Mercer is distinguished by his ability to guide and support other team members, particularly mentoring Sam Rivera, the squad's cybersecurity expert. He emphasizes the importance of intuition and trust, balancing the team's reliance on advanced technology with human insight [Data: Reports (1); Sources (1, 2)].\\n- **Team Dynamics:** Mercer works alongside Taylor Cruz (command and protocol), Dr. Jordan Hayes (scientific expertise), and Sam Rivera (technical skills), contributing to a cohesive and adaptable unit. His leadership is complemented by Cruz's authoritative command, Hayes's analytical skills, and Rivera's technical acumen [Data: Reports (1)].\\n- **Operational and Relational Impact:** Beyond operational duties, Mercer fosters collaboration and trust within the squad, ensuring the team's readiness and resilience in the face of the unknown challenges presented by the Dulce base [Data: Reports (1)].\\n\\n## Summary\\n\\nIn summary, Agent Alex Mercer is a vital leader and mentor within the Paranormal Military Squad, instrumental in the exploration of the Dulce base and the broader objectives of Operation: Dulce. His leadership, intuition, and ability to foster team cohesion are central to the mission's success [Data: Reports (1)].\""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"resp.response"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'What specific actions did Agent Mercer take during Operation: Dulce?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 4 a mask of duty.\\n\\nIn the midst of the descen...\n",
"1 2 differently than praise from others. This was...\n",
"2 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"3 1 , the hollow echo of the bay a stark reminder ...\n",
"4 3 contrast to the rigid silence enveloping the ...}, 'What challenges did Agent Mercer face during the exploration of the Dulce base?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 4 a mask of duty.\\n\\nIn the midst of the descen...\n",
"1 2 differently than praise from others. This was...\n",
"2 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"3 1 , the hollow echo of the bay a stark reminder ...\n",
"4 3 contrast to the rigid silence enveloping the ...}, 'What is the significance of the Dulce base in Operation: Dulce?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"1 2 differently than praise from others. This was...\n",
"2 1 , the hollow echo of the bay a stark reminder ...\n",
"3 3 contrast to the rigid silence enveloping the ...\n",
"4 4 a mask of duty.\\n\\nIn the midst of the descen...}, \"How did Mercer's relationship with Sam Rivera influence the outcome of the operation?\": {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"1 2 differently than praise from others. This was...\n",
"2 1 , the hollow echo of the bay a stark reminder ...\n",
"3 3 contrast to the rigid silence enveloping the ...\n",
"4 4 a mask of duty.\\n\\nIn the midst of the descen...}, 'What specific interactions did Mercer have with other team members while exploring the base?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"1 2 differently than praise from others. This was...\n",
"2 1 , the hollow echo of the bay a stark reminder ...\n",
"3 3 contrast to the rigid silence enveloping the ...\n",
"4 4 a mask of duty.\\n\\nIn the midst of the descen...}, 'How did Mercer respond to the psychological stress and fear associated with the unknown dangers of the Dulce base?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 4 a mask of duty.\\n\\nIn the midst of the descen...\n",
"1 2 differently than praise from others. This was...\n",
"2 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"3 1 , the hollow echo of the bay a stark reminder ...\n",
"4 3 contrast to the rigid silence enveloping the ...}, \"How do the team members' roles and skills contribute to the exploration of the Dulce base?\": {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 2 differently than praise from others. This was...\n",
"1 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"2 3 contrast to the rigid silence enveloping the ...\n",
"3 1 , the hollow echo of the bay a stark reminder ...\n",
"4 4 a mask of duty.\\n\\nIn the midst of the descen...}, 'What dangers or anomalies do the agents encounter within the Dulce base?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"1 2 differently than praise from others. This was...\n",
"2 3 contrast to the rigid silence enveloping the ...\n",
"3 4 a mask of duty.\\n\\nIn the midst of the descen...\n",
"4 1 , the hollow echo of the bay a stark reminder ...}, 'What is the historical or rumored background of the Dulce base within the context of Operation: Dulce?': {'reports': id title \\\n",
"0 1 Paranormal Military Squad and Operation: Dulce \n",
"\n",
" content \n",
"0 # Paranormal Military Squad and Operation: Dul... , 'entities': Empty DataFrame\n",
"Columns: [in_context]\n",
"Index: [], 'sources': id text\n",
"0 0 # Operation: Dulce\\n\\n## Chapter 1\\n\\nThe thru...\n",
"1 2 differently than praise from others. This was...\n",
"2 1 , the hollow echo of the bay a stark reminder ...\n",
"3 3 contrast to the rigid silence enveloping the ...\n",
"4 4 a mask of duty.\\n\\nIn the midst of the descen...}}\n"
]
}
],
"source": [
"print(resp.context_data)"
]
@ -212,7 +513,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "graphrag",
"language": "python",
"name": "python3"
},
@ -226,7 +527,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.12.10"
}
},
"nbformat": 4,