diff --git a/examples_notebooks/drift_search/index.html b/examples_notebooks/drift_search/index.html index 970b9b65..f3d34f90 100644 --- a/examples_notebooks/drift_search/index.html +++ b/examples_notebooks/drift_search/index.html @@ -1874,9 +1874,6 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: read_indexer_reports, read_indexer_text_units, ) -from graphrag.query.input.loaders.dfs import ( - store_entity_semantic_embeddings, -) from graphrag.query.llm.oai.chat_openai import ChatOpenAI from graphrag.query.llm.oai.embedding import OpenAIEmbedding from graphrag.query.llm.oai.typing import OpenaiApiType @@ -1910,9 +1907,6 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: collection_name="default-entity-description", ) description_embedding_store.connect(db_uri=LANCEDB_URI) -entity_description_embeddings = store_entity_semantic_embeddings( - entities=entities, vectorstore=description_embedding_store -) print(f"Entity count: {len(entity_df)}") entity_df.head() @@ -1941,9 +1935,6 @@ from graphrag.query.indexer_adapters import ( read_indexer_reports, read_indexer_text_units, ) -from graphrag.query.input.loaders.dfs import ( - store_entity_semantic_embeddings, -) from graphrag.query.llm.oai.chat_openai import ChatOpenAI from graphrag.query.llm.oai.embedding import OpenAIEmbedding from graphrag.query.llm.oai.typing import OpenaiApiType @@ -1977,9 +1968,6 @@ description_embedding_store = LanceDBVectorStore( collection_name="default-entity-description", ) description_embedding_store.connect(db_uri=LANCEDB_URI) -entity_description_embeddings = store_entity_semantic_embeddings( - entities=entities, vectorstore=description_embedding_store -) print(f"Entity count: {len(entity_df)}") entity_df.head() @@ -2005,104 +1993,70 @@ text_unit_df.head()
- -
-
- -
-
- -
@@ -2175,8 +2129,8 @@ text_embedder = OpenAIEmbedding( -
-
+
+ -
def embed_community_reports(
+
def read_community_reports(
     input_dir: str,
-    embedder: OpenAIEmbedding,
     community_report_table: str = COMMUNITY_REPORT_TABLE,
 ):
     """Embeds the full content of the community reports and saves the DataFrame with embeddings to the output path."""
     input_path = Path(input_dir) / f"{community_report_table}.parquet"
-    output_path = Path(input_dir) / f"{community_report_table}_with_embeddings.parquet"
-
-    if not Path(output_path).exists():
-        print("Embedding file not found. Computing community report embeddings...")
-
-        report_df = pd.read_parquet(input_path)
-
-        if "full_content" not in report_df.columns:
-            error_msg = f"'full_content' column not found in {input_path}"
-            raise ValueError(error_msg)
-
-        report_df["full_content_embeddings"] = report_df.loc[:, "full_content"].apply(
-            lambda x: embedder.embed(x)
-        )
-
-        # Save the DataFrame with embeddings to the output path
-        report_df.to_parquet(output_path)
-        print(f"Embeddings saved to {output_path}")
-        return report_df
-    print(f"Embeddings file already exists at {output_path}")
-    return pd.read_parquet(output_path)
+    return pd.read_parquet(input_path)
 
 
-report_df = embed_community_reports(INPUT_DIR, text_embedder)
+report_df = read_community_reports(INPUT_DIR)
 reports = read_indexer_reports(
     report_df,
     entity_df,
@@ -2232,37 +2165,16 @@ text_embedder = OpenAIEmbedding(
     content_embedding_col="full_content_embeddings",
 )
 
-
def embed_community_reports( +
def read_community_reports( input_dir: str, - embedder: OpenAIEmbedding, community_report_table: str = COMMUNITY_REPORT_TABLE, ): """Embeds the full content of the community reports and saves the DataFrame with embeddings to the output path.""" input_path = Path(input_dir) / f"{community_report_table}.parquet" - output_path = Path(input_dir) / f"{community_report_table}_with_embeddings.parquet" - - if not Path(output_path).exists(): - print("Embedding file not found. Computing community report embeddings...") - - report_df = pd.read_parquet(input_path) - - if "full_content" not in report_df.columns: - error_msg = f"'full_content' column not found in {input_path}" - raise ValueError(error_msg) - - report_df["full_content_embeddings"] = report_df.loc[:, "full_content"].apply( - lambda x: embedder.embed(x) - ) - - # Save the DataFrame with embeddings to the output path - report_df.to_parquet(output_path) - print(f"Embeddings saved to {output_path}") - return report_df - print(f"Embeddings file already exists at {output_path}") - return pd.read_parquet(output_path) + return pd.read_parquet(input_path) -report_df = embed_community_reports(INPUT_DIR, text_embedder) +report_df = read_community_reports(INPUT_DIR) reports = read_indexer_reports( report_df, entity_df, @@ -2273,141 +2185,6 @@ reports = read_indexer_reports(
-
@@ -2434,7 +2211,7 @@ File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7 entities=entities, relationships=relationships, reports=reports, - entity_text_embeddings=entity_description_embeddings, + entity_text_embeddings=description_embedding_store, text_units=text_units, ) @@ -2448,7 +2225,7 @@ File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7 entities=entities, relationships=relationships, reports=reports, - entity_text_embeddings=entity_description_embeddings, + entity_text_embeddings=description_embedding_store, text_units=text_units, ) @@ -2469,21 +2246,21 @@ search = DRIFTSearch(
 ---------------------------------------------------------------------------
 NameError                                 Traceback (most recent call last)
-Cell In[5], line 6
+Cell In[5], line 4
       1 context_builder = DRIFTSearchContextBuilder(
       2     chat_llm=chat_llm,
       3     text_embedder=text_embedder,
-      4     entities=entities,
+----> 4     entities=entities,
       5     relationships=relationships,
-----> 6     reports=reports,
-      7     entity_text_embeddings=entity_description_embeddings,
+      6     reports=reports,
+      7     entity_text_embeddings=description_embedding_store,
       8     text_units=text_units,
       9 )
      11 search = DRIFTSearch(
      12     llm=chat_llm, context_builder=context_builder, token_encoder=token_encoder
      13 )
 
-NameError: name 'reports' is not defined
+NameError: name 'entities' is not defined
diff --git a/examples_notebooks/global_search/index.html b/examples_notebooks/global_search/index.html index 816e8d01..8f8ae714 100644 --- a/examples_notebooks/global_search/index.html +++ b/examples_notebooks/global_search/index.html @@ -2244,114 +2244,38 @@ report_df.head()
- -
-
- -
@@ -2368,8 +2292,8 @@ Report count after filtering by community level 2: 17 -
-
+
+
+
+ + +
@@ -2487,8 +2434,8 @@ reduce_llm_params = {
-
-
+
+
+
+ + +
@@ -2579,186 +2556,16 @@ print(result.response)
-
-
- - -
-
- -
@@ -2796,174 +2603,17 @@ result.context_data["reports"]
-
- -
@@ -3007,9 +2657,17 @@ print(
-
diff --git a/examples_notebooks/global_search_with_dynamic_community_selection/index.html b/examples_notebooks/global_search_with_dynamic_community_selection/index.html index 0b15e14c..4ae51a13 100644 --- a/examples_notebooks/global_search_with_dynamic_community_selection/index.html +++ b/examples_notebooks/global_search_with_dynamic_community_selection/index.html @@ -2152,138 +2152,38 @@ report_df.head()
- -
-
- -
-
- -
@@ -2302,8 +2202,8 @@ See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stab -
-
+
+
+
+ + +
@@ -2447,8 +2382,8 @@ reduce_llm_params = {
-
-
+
+
+
+ + +
@@ -2541,316 +2506,14 @@ print(result.response)
 ---------------------------------------------------------------------------
-RateLimitError                            Traceback (most recent call last)
+NameError                                 Traceback (most recent call last)
 Cell In[9], line 1
-----> 1 result = await search_engine.asearch(
+----> 1 result = await search_engine.asearch(
       2     "What is Cosmic Vocalization and who are involved in it?"
       3 )
       5 print(result.response)
 
-File ~/work/graphrag/graphrag/graphrag/query/structured_search/global_search/search.py:156, in GlobalSearch.asearch(self, query, conversation_history, **kwargs)
-    153 llm_calls, prompt_tokens, output_tokens = {}, {}, {}
-    155 start_time = time.time()
---> 156 context_result = await self.context_builder.build_context(
-    157     query=query,
-    158     conversation_history=conversation_history,
-    159     **self.context_builder_params,
-    160 )
-    161 llm_calls["build_context"] = context_result.llm_calls
-    162 prompt_tokens["build_context"] = context_result.prompt_tokens
-
-File ~/work/graphrag/graphrag/graphrag/query/structured_search/global_search/community_context.py:98, in GlobalCommunityContext.build_context(self, query, conversation_history, use_community_summary, column_delimiter, shuffle_data, include_community_rank, min_community_rank, community_rank_name, include_community_weight, community_weight_name, normalize_community_weight, max_tokens, context_name, conversation_history_user_turns_only, conversation_history_max_turns, **kwargs)
-     93 community_reports = self.community_reports
-     94 if self.dynamic_community_selection is not None:
-     95     (
-     96         community_reports,
-     97         dynamic_info,
----> 98     ) = await self.dynamic_community_selection.select(query)
-     99     llm_calls += dynamic_info["llm_calls"]
-    100     prompt_tokens += dynamic_info["prompt_tokens"]
-
-File ~/work/graphrag/graphrag/graphrag/query/context_builder/dynamic_community_selection.py:106, in DynamicCommunitySelection.select(self, query)
-    103 relevant_communities = set()
-    105 while queue:
---> 106     gather_results = await asyncio.gather(*[
-    107         rate_relevancy(
-    108             query=query,
-    109             description=(
-    110                 self.reports[community].summary
-    111                 if self.use_summary
-    112                 else self.reports[community].full_content
-    113             ),
-    114             llm=self.llm,
-    115             token_encoder=self.token_encoder,
-    116             rate_query=self.rate_query,
-    117             num_repeats=self.num_repeats,
-    118             semaphore=self.semaphore,
-    119             **self.llm_kwargs,
-    120         )
-    121         for community in queue
-    122     ])
-    124     communities_to_rate = []
-    125     for community, result in zip(queue, gather_results, strict=True):
-
-File ~/work/graphrag/graphrag/graphrag/query/context_builder/rate_relevancy.py:54, in rate_relevancy(query, description, llm, token_encoder, rate_query, num_repeats, semaphore, **llm_kwargs)
-     52 for _ in range(num_repeats):
-     53     async with semaphore if semaphore is not None else nullcontext():
----> 54         response = await llm.agenerate(messages=messages, **llm_kwargs)
-     55     try:
-     56         _, parsed_response = try_parse_json_object(response)
-
-File ~/work/graphrag/graphrag/graphrag/query/llm/oai/chat_openai.py:142, in ChatOpenAI.agenerate(self, messages, streaming, callbacks, **kwargs)
-    135 try:
-    136     retryer = AsyncRetrying(
-    137         stop=stop_after_attempt(self.max_retries),
-    138         wait=wait_exponential_jitter(max=10),
-    139         reraise=True,
-    140         retry=retry_if_exception_type(self.retry_error_types),  # type: ignore
-    141     )
---> 142     async for attempt in retryer:
-    143         with attempt:
-    144             return await self._agenerate(
-    145                 messages=messages,
-    146                 streaming=streaming,
-    147                 callbacks=callbacks,
-    148                 **kwargs,
-    149             )
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/tenacity/asyncio/__init__.py:166, in AsyncRetrying.__anext__(self)
-    164 async def __anext__(self) -> AttemptManager:
-    165     while True:
---> 166         do = await self.iter(retry_state=self._retry_state)
-    167         if do is None:
-    168             raise StopAsyncIteration
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/tenacity/asyncio/__init__.py:153, in AsyncRetrying.iter(self, retry_state)
-    151 result = None
-    152 for action in self.iter_state.actions:
---> 153     result = await action(retry_state)
-    154 return result
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/tenacity/_utils.py:99, in wrap_to_async_func.<locals>.inner(*args, **kwargs)
-     98 async def inner(*args: typing.Any, **kwargs: typing.Any) -> typing.Any:
----> 99     return call(*args, **kwargs)
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/tenacity/__init__.py:418, in BaseRetrying._post_stop_check_actions.<locals>.exc_check(rs)
-    416 retry_exc = self.retry_error_cls(fut)
-    417 if self.reraise:
---> 418     raise retry_exc.reraise()
-    419 raise retry_exc from fut.exception()
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/tenacity/__init__.py:185, in RetryError.reraise(self)
-    183 def reraise(self) -> t.NoReturn:
-    184     if self.last_attempt.failed:
---> 185         raise self.last_attempt.result()
-    186     raise self
-
-File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/concurrent/futures/_base.py:449, in Future.result(self, timeout)
-    447     raise CancelledError()
-    448 elif self._state == FINISHED:
---> 449     return self.__get_result()
-    451 self._condition.wait(timeout)
-    453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-
-File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self)
-    399 if self._exception:
-    400     try:
---> 401         raise self._exception
-    402     finally:
-    403         # Break a reference cycle with the exception in self._exception
-    404         self = None
-
-File ~/work/graphrag/graphrag/graphrag/query/llm/oai/chat_openai.py:144, in ChatOpenAI.agenerate(self, messages, streaming, callbacks, **kwargs)
-    142     async for attempt in retryer:
-    143         with attempt:
---> 144             return await self._agenerate(
-    145                 messages=messages,
-    146                 streaming=streaming,
-    147                 callbacks=callbacks,
-    148                 **kwargs,
-    149             )
-    150 except RetryError as e:
-    151     self._reporter.error(f"Error at agenerate(): {e}")
-
-File ~/work/graphrag/graphrag/graphrag/query/llm/oai/chat_openai.py:268, in ChatOpenAI._agenerate(self, messages, streaming, callbacks, **kwargs)
-    266 if not model:
-    267     raise ValueError(_MODEL_REQUIRED_MSG)
---> 268 response = await self.async_client.chat.completions.create(  # type: ignore
-    269     model=model,
-    270     messages=messages,  # type: ignore
-    271     stream=streaming,
-    272     **kwargs,
-    273 )
-    274 if streaming:
-    275     full_response = ""
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/resources/chat/completions.py:1661, in AsyncCompletions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
-   1620 @required_args(["messages", "model"], ["messages", "model", "stream"])
-   1621 async def create(
-   1622     self,
-   (...)
-   1658     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-   1659 ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-   1660     validate_response_format(response_format)
--> 1661     return await self._post(
-   1662         "/chat/completions",
-   1663         body=await async_maybe_transform(
-   1664             {
-   1665                 "messages": messages,
-   1666                 "model": model,
-   1667                 "audio": audio,
-   1668                 "frequency_penalty": frequency_penalty,
-   1669                 "function_call": function_call,
-   1670                 "functions": functions,
-   1671                 "logit_bias": logit_bias,
-   1672                 "logprobs": logprobs,
-   1673                 "max_completion_tokens": max_completion_tokens,
-   1674                 "max_tokens": max_tokens,
-   1675                 "metadata": metadata,
-   1676                 "modalities": modalities,
-   1677                 "n": n,
-   1678                 "parallel_tool_calls": parallel_tool_calls,
-   1679                 "prediction": prediction,
-   1680                 "presence_penalty": presence_penalty,
-   1681                 "response_format": response_format,
-   1682                 "seed": seed,
-   1683                 "service_tier": service_tier,
-   1684                 "stop": stop,
-   1685                 "store": store,
-   1686                 "stream": stream,
-   1687                 "stream_options": stream_options,
-   1688                 "temperature": temperature,
-   1689                 "tool_choice": tool_choice,
-   1690                 "tools": tools,
-   1691                 "top_logprobs": top_logprobs,
-   1692                 "top_p": top_p,
-   1693                 "user": user,
-   1694             },
-   1695             completion_create_params.CompletionCreateParams,
-   1696         ),
-   1697         options=make_request_options(
-   1698             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-   1699         ),
-   1700         cast_to=ChatCompletion,
-   1701         stream=stream or False,
-   1702         stream_cls=AsyncStream[ChatCompletionChunk],
-   1703     )
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1843, in AsyncAPIClient.post(self, path, cast_to, body, files, options, stream, stream_cls)
-   1829 async def post(
-   1830     self,
-   1831     path: str,
-   (...)
-   1838     stream_cls: type[_AsyncStreamT] | None = None,
-   1839 ) -> ResponseT | _AsyncStreamT:
-   1840     opts = FinalRequestOptions.construct(
-   1841         method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options
-   1842     )
--> 1843     return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1537, in AsyncAPIClient.request(self, cast_to, options, stream, stream_cls, remaining_retries)
-   1534 else:
-   1535     retries_taken = 0
--> 1537 return await self._request(
-   1538     cast_to=cast_to,
-   1539     options=options,
-   1540     stream=stream,
-   1541     stream_cls=stream_cls,
-   1542     retries_taken=retries_taken,
-   1543 )
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1623, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken)
-   1621 if remaining_retries > 0 and self._should_retry(err.response):
-   1622     await err.response.aclose()
--> 1623     return await self._retry_request(
-   1624         input_options,
-   1625         cast_to,
-   1626         retries_taken=retries_taken,
-   1627         response_headers=err.response.headers,
-   1628         stream=stream,
-   1629         stream_cls=stream_cls,
-   1630     )
-   1632 # If the response is streamed then we need to explicitly read the response
-   1633 # to completion before attempting to access the response text.
-   1634 if not err.response.is_closed:
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1670, in AsyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
-   1666 log.info("Retrying request to %s in %f seconds", options.url, timeout)
-   1668 await anyio.sleep(timeout)
--> 1670 return await self._request(
-   1671     options=options,
-   1672     cast_to=cast_to,
-   1673     retries_taken=retries_taken + 1,
-   1674     stream=stream,
-   1675     stream_cls=stream_cls,
-   1676 )
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1623, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken)
-   1621 if remaining_retries > 0 and self._should_retry(err.response):
-   1622     await err.response.aclose()
--> 1623     return await self._retry_request(
-   1624         input_options,
-   1625         cast_to,
-   1626         retries_taken=retries_taken,
-   1627         response_headers=err.response.headers,
-   1628         stream=stream,
-   1629         stream_cls=stream_cls,
-   1630     )
-   1632 # If the response is streamed then we need to explicitly read the response
-   1633 # to completion before attempting to access the response text.
-   1634 if not err.response.is_closed:
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1670, in AsyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
-   1666 log.info("Retrying request to %s in %f seconds", options.url, timeout)
-   1668 await anyio.sleep(timeout)
--> 1670 return await self._request(
-   1671     options=options,
-   1672     cast_to=cast_to,
-   1673     retries_taken=retries_taken + 1,
-   1674     stream=stream,
-   1675     stream_cls=stream_cls,
-   1676 )
-
-    [... skipping similar frames: AsyncAPIClient._request at line 1623 (17 times), AsyncAPIClient._retry_request at line 1670 (17 times)]
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1623, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken)
-   1621 if remaining_retries > 0 and self._should_retry(err.response):
-   1622     await err.response.aclose()
--> 1623     return await self._retry_request(
-   1624         input_options,
-   1625         cast_to,
-   1626         retries_taken=retries_taken,
-   1627         response_headers=err.response.headers,
-   1628         stream=stream,
-   1629         stream_cls=stream_cls,
-   1630     )
-   1632 # If the response is streamed then we need to explicitly read the response
-   1633 # to completion before attempting to access the response text.
-   1634 if not err.response.is_closed:
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1670, in AsyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
-   1666 log.info("Retrying request to %s in %f seconds", options.url, timeout)
-   1668 await anyio.sleep(timeout)
--> 1670 return await self._request(
-   1671     options=options,
-   1672     cast_to=cast_to,
-   1673     retries_taken=retries_taken + 1,
-   1674     stream=stream,
-   1675     stream_cls=stream_cls,
-   1676 )
-
-File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1638, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken)
-   1635         await err.response.aread()
-   1637     log.debug("Re-raising status error")
--> 1638     raise self._make_status_error_from_response(err.response) from None
-   1640 return await self._process_response(
-   1641     cast_to=cast_to,
-   1642     options=options,
-   (...)
-   1646     retries_taken=retries_taken,
-   1647 )
-
-RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+NameError: name 'search_engine' is not defined
diff --git a/examples_notebooks/index_migration/index.html b/examples_notebooks/index_migration/index.html new file mode 100644 index 00000000..88788215 --- /dev/null +++ b/examples_notebooks/index_migration/index.html @@ -0,0 +1,2503 @@ + + + + + + + + + + + + + + + + + + + Index migration - GraphRAG + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Index migration

+ + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/examples_notebooks/local_search/index.html b/examples_notebooks/local_search/index.html index fa767baa..76c31a45 100644 --- a/examples_notebooks/local_search/index.html +++ b/examples_notebooks/local_search/index.html @@ -2168,9 +2168,6 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: read_indexer_reports, read_indexer_text_units, ) -from graphrag.query.input.loaders.dfs import ( - store_entity_semantic_embeddings, -) from graphrag.query.llm.oai.chat_openai import ChatOpenAI from graphrag.query.llm.oai.embedding import OpenAIEmbedding from graphrag.query.llm.oai.typing import OpenaiApiType @@ -2194,9 +2191,6 @@ from graphrag.query.indexer_adapters import ( read_indexer_reports, read_indexer_text_units, ) -from graphrag.query.input.loaders.dfs import ( - store_entity_semantic_embeddings, -) from graphrag.query.llm.oai.chat_openai import ChatOpenAI from graphrag.query.llm.oai.embedding import OpenAIEmbedding from graphrag.query.llm.oai.typing import OpenaiApiType @@ -2333,9 +2327,6 @@ COMMUNITY_LEVEL = 2 collection_name="default-entity-description", ) description_embedding_store.connect(db_uri=LANCEDB_URI) -entity_description_embeddings = store_entity_semantic_embeddings( - entities=entities, vectorstore=description_embedding_store -) print(f"Entity count: {len(entity_df)}") entity_df.head() @@ -2352,9 +2343,6 @@ description_embedding_store = LanceDBVectorStore( collection_name="default-entity-description", ) description_embedding_store.connect(db_uri=LANCEDB_URI) -entity_description_embeddings = store_entity_semantic_embeddings( - entities=entities, vectorstore=description_embedding_store -) print(f"Entity count: {len(entity_df)}") entity_df.head() @@ -2368,137 +2356,70 @@ entity_df.head()
- -
-
- -
@@ -2555,113 +2476,58 @@ relationship_df.head()
- -
-
- -
@@ -3107,8 +2973,8 @@ text_embedder = OpenAIEmbedding( -
-
+
+
+
+ + +
@@ -3262,8 +3157,8 @@ llm_params = {
-
-
+
+
+
+ + +
@@ -3348,111 +3268,15 @@ print(result.response)
- -
-
-
@@ -3494,112 +3318,16 @@ print(result.response)
- -
-
-
@@ -3869,8 +3597,8 @@ Cell In[19], line 1 -
-
+
+
+
+ + +
@@ -3954,121 +3706,21 @@ print(candidate_questions.response)
-
diff --git a/sitemap.xml.gz b/sitemap.xml.gz index a68f3b8c..b42227d6 100644 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ