diff --git a/docs/examples_notebooks/global_search.ipynb b/docs/examples_notebooks/global_search.ipynb index 86f47859..57afc7f4 100644 --- a/docs/examples_notebooks/global_search.ipynb +++ b/docs/examples_notebooks/global_search.ipynb @@ -19,7 +19,6 @@ "import os\n", "\n", "import pandas as pd\n", - "import tiktoken\n", "\n", "from graphrag.config.enums import ModelType\n", "from graphrag.config.models.language_model_config import LanguageModelConfig\n", @@ -32,7 +31,8 @@ "from graphrag.query.structured_search.global_search.community_context import (\n", " GlobalCommunityContext,\n", ")\n", - "from graphrag.query.structured_search.global_search.search import GlobalSearch" + "from graphrag.query.structured_search.global_search.search import GlobalSearch\n", + "from graphrag.tokenizer.get_tokenizer import get_tokenizer" ] }, { @@ -58,7 +58,7 @@ "outputs": [], "source": [ "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", - "llm_model = os.environ[\"GRAPHRAG_LLM_MODEL\"]\n", + "llm_model = \"gpt-4.1\"\n", "\n", "config = LanguageModelConfig(\n", " api_key=api_key,\n", @@ -72,7 +72,7 @@ " config=config,\n", ")\n", "\n", - "token_encoder = tiktoken.encoding_for_model(llm_model)" + "tokenizer = get_tokenizer(config)" ] }, { @@ -142,7 +142,7 @@ " community_reports=reports,\n", " communities=communities,\n", " entities=entities, # default to None if you don't want to use community weights for ranking\n", - " token_encoder=token_encoder,\n", + " tokenizer=tokenizer,\n", ")" ] }, @@ -193,7 +193,7 @@ "search_engine = GlobalSearch(\n", " model=model,\n", " context_builder=context_builder,\n", - " token_encoder=token_encoder,\n", + " tokenizer=tokenizer,\n", " max_data_tokens=12_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)\n", " map_llm_params=map_llm_params,\n", " reduce_llm_params=reduce_llm_params,\n", @@ -241,7 +241,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "graphrag", "language": "python", "name": "python3" }, @@ -255,7 +255,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.10" } }, "nbformat": 4, diff --git a/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb b/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb index dbe3c67f..759855c8 100644 --- a/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb +++ b/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb @@ -60,7 +60,6 @@ "\n", "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", "llm_model = \"gpt-4.1\"\n", - "tokenizer = get_tokenizer(encoding_model=\"o200k_base\")\n", "\n", "config = LanguageModelConfig(\n", " api_key=api_key,\n", @@ -72,7 +71,9 @@ " name=\"global_search\",\n", " model_type=ModelType.OpenAIChat,\n", " config=config,\n", - ")" + ")\n", + "\n", + "tokenizer = get_tokenizer(config)" ] }, { diff --git a/docs/examples_notebooks/local_search.ipynb b/docs/examples_notebooks/local_search.ipynb index 97aada0c..63c8b9ab 100644 --- a/docs/examples_notebooks/local_search.ipynb +++ b/docs/examples_notebooks/local_search.ipynb @@ -202,7 +202,6 @@ "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", "llm_model = \"gpt-4.1\"\n", "embedding_model = \"text-embedding-3-small\"\n", - "tokenizer = get_tokenizer(encoding_model=\"o200k_base\")\n", "\n", "chat_config = LanguageModelConfig(\n", " api_key=api_key,\n", @@ -227,7 +226,9 @@ " name=\"local_search_embedding\",\n", " model_type=ModelType.OpenAIEmbedding,\n", " config=embedding_config,\n", - ")" + ")\n", + "\n", + "tokenizer = get_tokenizer(chat_config)" ] }, {