mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
Fix local_search notebook
This commit is contained in:
parent
4c4ab16070
commit
a618e7f456
@ -19,8 +19,8 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import tiktoken\n",
|
||||
"\n",
|
||||
"from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n",
|
||||
"from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\n",
|
||||
"from graphrag.query.indexer_adapters import (\n",
|
||||
" read_indexer_covariates,\n",
|
||||
@ -102,7 +102,9 @@
|
||||
"# load description embeddings to an in-memory lancedb vectorstore\n",
|
||||
"# to connect to a remote db, specify url and port values.\n",
|
||||
"description_embedding_store = LanceDBVectorStore(\n",
|
||||
" collection_name=\"default-entity-description\",\n",
|
||||
" vector_store_schema_config=VectorStoreSchemaConfig(\n",
|
||||
" index_name=\"default-entity-description\"\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"description_embedding_store.connect(db_uri=LANCEDB_URI)\n",
|
||||
"\n",
|
||||
@ -195,10 +197,12 @@
|
||||
"from graphrag.config.enums import ModelType\n",
|
||||
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
|
||||
"from graphrag.language_model.manager import ModelManager\n",
|
||||
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
|
||||
"\n",
|
||||
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
|
||||
"llm_model = os.environ[\"GRAPHRAG_LLM_MODEL\"]\n",
|
||||
"embedding_model = os.environ[\"GRAPHRAG_EMBEDDING_MODEL\"]\n",
|
||||
"llm_model = \"gpt-4.1\"\n",
|
||||
"embedding_model = \"text-embedding-3-small\"\n",
|
||||
"tokenizer = get_tokenizer(encoding_model=\"o200k_base\")\n",
|
||||
"\n",
|
||||
"chat_config = LanguageModelConfig(\n",
|
||||
" api_key=api_key,\n",
|
||||
@ -212,8 +216,6 @@
|
||||
" config=chat_config,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"token_encoder = tiktoken.encoding_for_model(llm_model)\n",
|
||||
"\n",
|
||||
"embedding_config = LanguageModelConfig(\n",
|
||||
" api_key=api_key,\n",
|
||||
" type=ModelType.OpenAIEmbedding,\n",
|
||||
@ -251,7 +253,7 @@
|
||||
" entity_text_embeddings=description_embedding_store,\n",
|
||||
" embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE\n",
|
||||
" text_embedder=text_embedder,\n",
|
||||
" token_encoder=token_encoder,\n",
|
||||
" tokenizer=tokenizer,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -314,7 +316,7 @@
|
||||
"search_engine = LocalSearch(\n",
|
||||
" model=chat_model,\n",
|
||||
" context_builder=context_builder,\n",
|
||||
" token_encoder=token_encoder,\n",
|
||||
" tokenizer=tokenizer,\n",
|
||||
" model_params=model_params,\n",
|
||||
" context_builder_params=local_context_params,\n",
|
||||
" response_type=\"multiple paragraphs\", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report\n",
|
||||
@ -426,7 +428,7 @@
|
||||
"question_generator = LocalQuestionGen(\n",
|
||||
" model=chat_model,\n",
|
||||
" context_builder=context_builder,\n",
|
||||
" token_encoder=token_encoder,\n",
|
||||
" tokenizer=tokenizer,\n",
|
||||
" model_params=model_params,\n",
|
||||
" context_builder_params=local_context_params,\n",
|
||||
")"
|
||||
@ -451,7 +453,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "graphrag",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -465,7 +467,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user