diff --git a/docs/examples_notebooks/local_search.ipynb b/docs/examples_notebooks/local_search.ipynb index 1769c6ad..97aada0c 100644 --- a/docs/examples_notebooks/local_search.ipynb +++ b/docs/examples_notebooks/local_search.ipynb @@ -19,8 +19,8 @@ "import os\n", "\n", "import pandas as pd\n", - "import tiktoken\n", "\n", + "from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n", "from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\n", "from graphrag.query.indexer_adapters import (\n", " read_indexer_covariates,\n", @@ -102,7 +102,9 @@ "# load description embeddings to an in-memory lancedb vectorstore\n", "# to connect to a remote db, specify url and port values.\n", "description_embedding_store = LanceDBVectorStore(\n", - " collection_name=\"default-entity-description\",\n", + " vector_store_schema_config=VectorStoreSchemaConfig(\n", + " index_name=\"default-entity-description\"\n", + " )\n", ")\n", "description_embedding_store.connect(db_uri=LANCEDB_URI)\n", "\n", @@ -195,10 +197,12 @@ "from graphrag.config.enums import ModelType\n", "from graphrag.config.models.language_model_config import LanguageModelConfig\n", "from graphrag.language_model.manager import ModelManager\n", + "from graphrag.tokenizer.get_tokenizer import get_tokenizer\n", "\n", "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", - "llm_model = os.environ[\"GRAPHRAG_LLM_MODEL\"]\n", - "embedding_model = os.environ[\"GRAPHRAG_EMBEDDING_MODEL\"]\n", + "llm_model = \"gpt-4.1\"\n", + "embedding_model = \"text-embedding-3-small\"\n", + "tokenizer = get_tokenizer(encoding_model=\"o200k_base\")\n", "\n", "chat_config = LanguageModelConfig(\n", " api_key=api_key,\n", @@ -212,8 +216,6 @@ " config=chat_config,\n", ")\n", "\n", - "token_encoder = tiktoken.encoding_for_model(llm_model)\n", - "\n", "embedding_config = LanguageModelConfig(\n", " api_key=api_key,\n", " type=ModelType.OpenAIEmbedding,\n", @@ -251,7 +253,7 @@ " entity_text_embeddings=description_embedding_store,\n", " embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE\n", " text_embedder=text_embedder,\n", - " token_encoder=token_encoder,\n", + " tokenizer=tokenizer,\n", ")" ] }, @@ -314,7 +316,7 @@ "search_engine = LocalSearch(\n", " model=chat_model,\n", " context_builder=context_builder,\n", - " token_encoder=token_encoder,\n", + " tokenizer=tokenizer,\n", " model_params=model_params,\n", " context_builder_params=local_context_params,\n", " response_type=\"multiple paragraphs\", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report\n", @@ -426,7 +428,7 @@ "question_generator = LocalQuestionGen(\n", " model=chat_model,\n", " context_builder=context_builder,\n", - " token_encoder=token_encoder,\n", + " tokenizer=tokenizer,\n", " model_params=model_params,\n", " context_builder_params=local_context_params,\n", ")" @@ -451,7 +453,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "graphrag", "language": "python", "name": "python3" }, @@ -465,7 +467,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.10" } }, "nbformat": 4,