Nov 2025 housekeeping (#2120)

* Remove gensim sideload * Split CI build/type checks from unit tests * Thorough review of docs to align with v3 * Format * Fix version * Fix type
2026-01-14 00:57:23 +08:00 · 2025-11-06 10:03:22 -08:00 · 2025-11-06 10:03:22 -08:00 · ae1f5e1811
commit ae1f5e1811
parent 6033e4ffa2
28 changed files with 190 additions and 141 deletions
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@ -6,7 +6,7 @@ permissions:
  contents: write

 env:
-  PYTHON_VERSION: "3.11"
+  PYTHON_VERSION: "3.12"

 jobs:
  build:
--- a/.github/workflows/python-checks.yml
+++ b/.github/workflows/python-checks.yml
@ -0,0 +1,78 @@
+name: Python Build and Type Check
+on:
+  push:
+    branches:
+      - "**/main" # match branches like feature/main
+      - "main"    # match the main branch
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    branches:
+      - "**/main"
+      - "main"
+    paths-ignore:
+      - "**/*.md"
+      - ".semversioner/**"
+
+permissions:
+  contents: read
+  pull-requests: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  # Only run the for the latest commit
+  cancel-in-progress: true
+
+jobs:
+  python-ci:
+    # skip draft PRs
+    if: github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12"]
+        os: [ubuntu-latest, windows-latest]
+      fail-fast: false # Continue running all jobs even if one fails
+    env:
+      DEBUG: 1
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dorny/paths-filter@v3
+        id: changes
+        with:
+          filters: |
+            python:
+              - 'graphrag/**/*'
+              - 'uv.lock'
+              - 'pyproject.toml'
+              - '**/*.py'
+              - '**/*.toml'
+              - '**/*.ipynb'
+              - '.github/workflows/python*.yml'
+              - 'tests/**/*'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          uv sync --all-packages
+
+      - name: Check
+        run: |
+          uv run poe check
+
+      - name: Build
+        run: |
+          uv build --all-packages
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@ -32,7 +32,7 @@ jobs:
    if: github.event.pull_request.draft == false
    strategy:
      matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
        os: [ubuntu-latest, windows-latest]
      fail-fast: false # continue running all jobs even if one fails
    env:
@ -68,7 +68,6 @@ jobs:
        shell: bash
        run: |
          uv sync --all-packages
-          uv pip install gensim

      - name: Build
        run: |
--- a/.github/workflows/python-notebook-tests.yml
+++ b/.github/workflows/python-notebook-tests.yml
@ -32,7 +32,7 @@ jobs:
    if: github.event.pull_request.draft == false
    strategy:
      matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
        os: [ubuntu-latest, windows-latest]
      fail-fast: false # Continue running all jobs even if one fails
    env:
@ -68,7 +68,6 @@ jobs:
        shell: bash
        run: |
          uv sync --all-packages
-          uv pip install gensim

      - name: Notebook Test
        run: |
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@ -6,7 +6,7 @@ on:
    branches: [main]

 env:
-  PYTHON_VERSION: "3.10"
+  PYTHON_VERSION: "3.12"

 jobs:
  publish:
--- a/.github/workflows/python-smoke-tests.yml
+++ b/.github/workflows/python-smoke-tests.yml
@ -32,7 +32,7 @@ jobs:
    if: github.event.pull_request.draft == false
    strategy:
      matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
        os: [ubuntu-latest, windows-latest]
      fail-fast: false # Continue running all jobs even if one fails
    env:
@ -73,7 +73,6 @@ jobs:
        shell: bash
        run: |
          uv sync --all-packages
-          uv pip install gensim

      - name: Build
        run: |
--- a/.github/workflows/python-unit-tests.yml
+++ b/.github/workflows/python-unit-tests.yml
@ -1,4 +1,4 @@
-name: Python CI
+name: Python Unit Tests
 on:
  push:
    branches:
@ -32,7 +32,7 @@ jobs:
    if: github.event.pull_request.draft == false
    strategy:
      matrix:
-        python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510
+        python-version: ["3.12"]
        os: [ubuntu-latest, windows-latest]
      fail-fast: false # Continue running all jobs even if one fails
    env:
@ -68,15 +68,6 @@ jobs:
        shell: bash
        run: |
          uv sync --all-packages
-          uv pip install gensim
-
-      - name: Check
-        run: |
-          uv run poe check
-
-      - name: Build
-        run: |
-          uv build --all-packages

      - name: Unit Test
        run: |
--- a/docs/config/models.md
+++ b/docs/config/models.md
@ -6,9 +6,9 @@ This page contains information on selecting a model to use and options to supply

 GraphRAG was built and tested using OpenAI models, so this is the default model set we support. This is not intended to be a limiter or statement of quality or fitness for your use case, only that it's the set we are most familiar with for prompting, tuning, and debugging.

-Starting with version 2.6.0, GraphRAG supports using [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 
+GraphRAG uses [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 

-Example using LiteLLm as the language model tool for GraphRAG:
+Example using LiteLLM as the language model manager for GraphRAG:

 ```yaml
 models:
@ -37,7 +37,7 @@ See [Detailed Configuration](yaml.md) for more details on configuration. [View L

 ## Model Selection Considerations

-GraphRAG has been most thoroughly tested with the gpt-4 series of models from OpenAI, including gpt-4 gpt-4-turbo, gpt-4o, and gpt-4o-mini. Our [arXiv paper](https://arxiv.org/abs/2404.16130), for example, performed quality evaluation using gpt-4-turbo. As stated above, non-OpenAI models are now supported with GraphRAG 2.6.0 and onwards through the use of LiteLLM but the suite of gpt-4 series of models from OpenAI remain the most tested and supported suite of models for GraphRAG.
+GraphRAG has been most thoroughly tested with the gpt-4 series of models from OpenAI, including gpt-4 gpt-4-turbo, gpt-4o, and gpt-4o-mini. Our [arXiv paper](https://arxiv.org/abs/2404.16130), for example, performed quality evaluation using gpt-4-turbo. As stated above, non-OpenAI models are supported through the use of LiteLLM but the suite of gpt-4 series of models from OpenAI remain the most tested and supported suite of models for GraphRAG – in other words, these are the models we know best and can help resolve issues with.

 Versions of GraphRAG before 2.2.0 made extensive use of `max_tokens` and `logit_bias` to control generated response length or content. The introduction of the o-series of models added new, non-compatible parameters because these models include a reasoning component that has different consumption patterns and response generation attributes than non-reasoning models. GraphRAG 2.2.0 now supports these models, but there are important differences that need to be understood before you switch.

@ -85,9 +85,9 @@ global_search:

 Another option would be to avoid using a language model at all for the graph extraction, instead using the `fast` [indexing method](../index/methods.md) that uses NLP for portions of the indexing phase in lieu of LLM APIs.

-## Using Non-OpenAI Models
+## Using Custom Models

-As shown above, non-OpenAI models may be used via LiteLLM starting with GraphRAG version 2.6.0 but cases may still exist in which some users wish to use models not supported by LiteLLM. There are two approaches one can use to connect to unsupported models:
+LiteLLM supports hundreds of models, but cases may still exist in which some users wish to use models not supported by LiteLLM. There are two approaches one can use to connect to unsupported models:

 ### Proxy APIs

@ -95,7 +95,7 @@ Many users have used platforms such as [ollama](https://ollama.com/) and [LiteLL

 ### Model Protocol

-As of GraphRAG 2.0.0, we support model injection through the use of a standard chat and embedding Protocol and an accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
+We support model injection through the use of a standard chat and embedding Protocol and accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.

 - Our Protocol is [defined here](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/protocol/base.py)
 - We have a simple mock implementation in our tests that you can [reference here](https://github.com/microsoft/graphrag/blob/main/tests/mock_provider.py)
@ -103,12 +103,12 @@ As of GraphRAG 2.0.0, we support model injection through the use of a standard c
 Once you have a model implementation, you need to register it with our ChatModelFactory or EmbeddingModelFactory:

 ```python
-class MyCustomModel:
+class MyCustomChatModel:
    ...
    # implementation

 # elsewhere...
-ChatModelFactory.register("my-custom-chat-model", lambda **kwargs: MyCustomModel(**kwargs))
+ChatModelFactory.register("my-custom-chat-model", MyCustomChatModel)
 ```

 Then in your config you can reference the type name you used:
--- a/docs/config/yaml.md
+++ b/docs/config/yaml.md
@ -21,7 +21,7 @@ default_chat_model:

 ### models

-This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them differentially in the workflow steps.
+This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them independently in the workflow steps.

 For example:
 ```yml
@ -173,7 +173,7 @@ Where to put all vectors for the system. Configured for lancedb by default. This
 - `audience` **str** (only for AI Search) - Audience for managed identity token if managed identity authentication is used.
 - `index_prefix` **str** - (optional) A prefix for the indexes you will create for embeddings. This stores all indexes (tables) for a given dataset ingest.
 - `database_name` **str** - (cosmosdb only) Name of the database.
- `embeddings_schema` **list[dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
+- `embeddings_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
  - `<supported_embedding>`: 
    - `index_name` **str**: (optional) - Name for the specific embedding index table.
    - `id_field` **str**: (optional) - Field name to be used as id. Default=`id`
@ -332,7 +332,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 #### Fields

 - `embeddings` **bool** - Export embeddings snapshots to parquet.
- `graphml` **bool** - Export graph snapshots to GraphML.
+- `graphml` **bool** - Export graph snapshot to GraphML.

 ## Query

--- a/docs/developing.md
+++ b/docs/developing.md
@ -13,7 +13,7 @@

 ```sh
 # install python dependencies
-uv sync
+uv sync --all-packages
 ```

 ## Execute the Indexing Engine
--- a/docs/examples_notebooks/api_overview.ipynb
+++ b/docs/examples_notebooks/api_overview.ipynb
@ -28,11 +28,10 @@
    "from pathlib import Path\n",
    "from pprint import pprint\n",
    "\n",
+    "import graphrag.api as api\n",
    "import pandas as pd\n",
    "from graphrag.config.load_config import load_config\n",
-    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
-    "\n",
-    "import graphrag.api as api"
+    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
   ]
  },
  {
--- a/docs/examples_notebooks/input_documents.ipynb
+++ b/docs/examples_notebooks/input_documents.ipynb
@ -30,11 +30,10 @@
    "from pathlib import Path\n",
    "from pprint import pprint\n",
    "\n",
+    "import graphrag.api as api\n",
    "import pandas as pd\n",
    "from graphrag.config.load_config import load_config\n",
-    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
-    "\n",
-    "import graphrag.api as api"
+    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
   ]
  },
  {
--- a/docs/get_started.md
+++ b/docs/get_started.md
@ -6,12 +6,7 @@

 [Python 3.10-3.12](https://www.python.org/downloads/)

-To get started with the GraphRAG system, you have a few options:
-
-👉 [Install from pypi](https://pypi.org/project/graphrag/). <br/>
-👉 [Use it from source](developing.md)<br/>
-
-The following is a simple end-to-end example for using the GraphRAG system, using the install from pypi option.
+The following is a simple end-to-end example for using GraphRAG on the command line after installing from [pypi](https://pypi.org/project/graphrag/).

 It shows how to use the system to index some text, and then use the indexed data to answer questions about the documents.

@ -22,7 +17,6 @@ pip install graphrag
 ```

 # Running the Indexer
-
 We need to set up a data project and some initial configuration. First let's get a sample dataset ready:

 ```sh
@ -79,7 +73,7 @@ You will also need to login with [az login](https://learn.microsoft.com/en-us/cl

 ## Running the Indexing pipeline

-Finally we'll run the pipeline!
+Now we're ready to run the pipeline!

 ```sh
 graphrag index --root ./christmas
@ -87,8 +81,7 @@ graphrag index --root ./christmas

 ![pipeline executing from the CLI](img/pipeline-running.png)

-This process will take some time to run. This depends on the size of your input data, what model you're using, and the text chunk size being used (these can be configured in your `settings.yaml` file).
-Once the pipeline is complete, you should see a new folder called `./christmas/output` with a series of parquet files.
+This process will usually take a few minutes to run. Once the pipeline is complete, you should see a new folder called `./christmas/output` with a series of parquet files.

 # Using the Query Engine

--- a/docs/index/architecture.md
+++ b/docs/index/architecture.md
@ -6,23 +6,25 @@

 In order to support the GraphRAG system, the outputs of the indexing engine (in the Default Configuration Mode) are aligned to a knowledge model we call the _GraphRAG Knowledge Model_.
 This model is designed to be an abstraction over the underlying data storage technology, and to provide a common interface for the GraphRAG system to interact with.
-In normal use-cases the outputs of the GraphRAG Indexer would be loaded into a database system, and the GraphRAG's Query Engine would interact with the database using the knowledge model data-store types.

 ### Workflows

-Because of the complexity of our data indexing tasks, we needed to be able to express our data pipeline as series of multiple, interdependent workflows.
+Below is the core GraphRAG indexing pipeline. Individual workflows are described in detail in the [dataflow](./default_dataflow.md) page.

 ```mermaid
 ---
-title: Sample Workflow DAG
+title: Basic GraphRAG
 ---
 stateDiagram-v2
-    [*] --> Prepare
-    Prepare --> Chunk
-    Chunk --> ExtractGraph
-    Chunk --> EmbedDocuments
-    ExtractGraph --> GenerateReports
+    [*] --> LoadDocuments
+    LoadDocuments --> ChunkDocuments
+    ChunkDocuments --> ExtractGraph
+    ChunkDocuments --> ExtractClaims
+    ChunkDocuments --> EmbedChunks
+    ExtractGraph --> DetectCommunities
    ExtractGraph --> EmbedEntities
+    DetectCommunities --> GenerateReports
+    GenerateReports --> EmbedReports
 ```

 ### LLM Caching
@ -34,11 +36,11 @@ This allows our indexer to be more resilient to network issues, to act idempoten

 ### Providers & Factories

-Several subsystems within GraphRAG use a factory pattern to register and retrieve provider implementations. This allows deep customization to support models, storage, and so on that you may use but isn't built directly into GraphRAG.
+Several subsystems within GraphRAG use a factory pattern to register and retrieve provider implementations. This allows deep customization to support your own implementations of models, storage, and so on that we haven't built into the core library.

 The following subsystems use a factory pattern that allows you to register your own implementations:

- [language model](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/factory.py) - implement your own `chat` and `embed` methods to use a model provider of choice beyond the built-in OpenAI/Azure support
+- [language model](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/factory.py) - implement your own `chat` and `embed` methods to use a model provider of choice beyond the built-in LiteLLM wrapper
 - [input reader](https://github.com/microsoft/graphrag/blob/main/graphrag/index/input/factory.py) - implement your own input document reader to support file types other than text, CSV, and JSON
 - [cache](https://github.com/microsoft/graphrag/blob/main/graphrag/cache/factory.py) - create your own cache storage location in addition to the file, blob, and CosmosDB ones we provide
 - [logger](https://github.com/microsoft/graphrag/blob/main/graphrag/logger/factory.py) - create your own log writing location in addition to the built-in file and blob storage
--- a/docs/index/byog.md
+++ b/docs/index/byog.md
@ -16,8 +16,6 @@ The approach described here will be to run a custom GraphRAG workflow pipeline t

 See the full entities [table schema](./outputs.md#entities). For graph summarization purposes, you only need id, title, description, and the list of text_unit_ids.

-The additional properties are used for optional graph visualization purposes.
-
 ### Relationships

 See the full relationships [table schema](./outputs.md#relationships). For graph summarization purposes, you only need id, source, target, description, weight, and the list of text_unit_ids.
--- a/docs/index/default_dataflow.md
+++ b/docs/index/default_dataflow.md
@ -4,8 +4,8 @@

 The knowledge model is a specification for data outputs that conform to our data-model definition. You can find these definitions in the python/graphrag/graphrag/model folder within the GraphRAG repository. The following entity types are provided. The fields here represent the fields that are text-embedded by default.

- `Document` - An input document into the system. These either represent individual rows in a CSV or individual .txt file.
- `TextUnit` - A chunk of text to analyze. The size of these chunks, their overlap, and whether they adhere to any data boundaries may be configured below. A common use case is to set `CHUNK_BY_COLUMNS` to `id` so that there is a 1-to-many relationship between documents and TextUnits instead of a many-to-many.
+- `Document` - An input document into the system. These either represent individual rows in a CSV or individual .txt files.
+- `TextUnit` - A chunk of text to analyze. The size of these chunks, their overlap, and whether they adhere to any data boundaries may be configured below.
 - `Entity` - An entity extracted from a TextUnit. These represent people, places, events, or some other entity-model that you provide.
 - `Relationship` - A relationship between two entities.
 - `Covariate` - Extracted claim information, which contains statements about entities which may be time-bound.
@ -25,30 +25,26 @@ flowchart TB
    documents[Documents] --> chunk[Chunk]
    chunk --> textUnits[Text Units]
    end
-    subgraph phase2[Phase 2: Graph Extraction]
+    subgraph phase2[Phase 2: Document Processing]
+    documents --> link_to_text_units[Link to TextUnits]
+    textUnits --> link_to_text_units
+    link_to_text_units --> document_outputs[Documents Table]
+    end
+    subgraph phase3[Phase 3 Graph Extraction]
    textUnits --> graph_extract[Entity & Relationship Extraction]
    graph_extract --> graph_summarize[Entity & Relationship Summarization]
    graph_summarize --> claim_extraction[Claim Extraction]
    claim_extraction --> graph_outputs[Graph Tables]
    end
-    subgraph phase3[Phase 3: Graph Augmentation]
+    subgraph phase4[Phase 4: Graph Augmentation]
    graph_outputs --> community_detect[Community Detection]
    community_detect --> community_outputs[Communities Table]
    end
-    subgraph phase4[Phase 4: Community Summarization]
+    subgraph phase5[Phase 5: Community Summarization]
    community_outputs --> summarized_communities[Community Summarization]
    summarized_communities --> community_report_outputs[Community Reports Table]
    end
-    subgraph phase5[Phase 5: Document Processing]
-    documents --> link_to_text_units[Link to TextUnits]
-    textUnits --> link_to_text_units
-    link_to_text_units --> document_outputs[Documents Table]
-    end
-    subgraph phase6[Phase 6: Network Visualization]
-    graph_outputs --> graph_embed[Graph Embedding]
-    graph_embed --> combine_nodes[Final Entities]
-    end
-    subgraph phase7[Phase 7: Text Embeddings]
+    subgraph phase6[Phase 6: Text Embeddings]
    textUnits --> text_embed[Text Embedding]
    graph_outputs --> description_embed[Description Embedding]
    community_report_outputs --> content_embed[Content Embedding]
@ -73,10 +69,30 @@ flowchart LR

 ```

-## Phase 2: Graph Extraction
+## Phase 2: Document Processing
+
+In this phase of the workflow, we create the _Documents_ table for the knowledge model. Final documents are not used directly in GraphRAG, but this step links them to their constituent text units for provenance in your own applications.
+
+```mermaid
+---
+title: Document Processing
+---
+flowchart LR
+    aug[Augment] --> dp[Link to TextUnits] --> dg[Documents Table]
+```
+
+### Link to TextUnits
+
+In this step, we link each document to the text-units that were created in the first phase. This allows us to understand which documents are related to which text-units and vice-versa.
+
+### Documents Table
+
+At this point, we can export the **Documents** table into the knowledge Model.
+
+## Phase 3: Graph Extraction

 In this phase, we analyze each text unit and extract our graph primitives: _Entities_, _Relationships_, and _Claims_.
-Entities and Relationships are extracted at once in our _entity_extract_ verb, and claims are extracted in our _claim_extract_ verb. Results are then combined and passed into following phases of the pipeline.
+Entities and Relationships are extracted at once in our _extract_graph_ workflow, and claims are extracted in our _extract_claims_ workflow. Results are then combined and passed into following phases of the pipeline.

 ```mermaid
 ---
@ -87,9 +103,11 @@ flowchart LR
    tu --> ce[Claim Extraction]
 ```

+> Note: if you are using the [FastGraphRAG](https://microsoft.github.io/graphrag/index/methods/#fastgraphrag) option, entity and relationship extraction will be performed using NLP to conserve LLM resources, and claim extraction will always be skipped.
+
 ### Entity & Relationship Extraction

-In this first step of graph extraction, we process each text-unit in order to extract entities and relationships out of the raw text using the LLM. The output of this step is a subgraph-per-TextUnit containing a list of **entities** with a _title_, _type_, and _description_, and a list of **relationships** with a _source_, _target_, and _description_.
+In this first step of graph extraction, we process each text-unit to extract entities and relationships out of the raw text using the LLM. The output of this step is a subgraph-per-TextUnit containing a list of **entities** with a _title_, _type_, and _description_, and a list of **relationships** with a _source_, _target_, and _description_.

 These subgraphs are merged together - any entities with the same _title_ and _type_ are merged by creating an array of their descriptions. Similarly, any relationships with the same _source_ and _target_ are merged by creating an array of their descriptions.

@ -103,9 +121,9 @@ Finally, as an independent workflow, we extract claims from the source TextUnits

 Note: claim extraction is _optional_ and turned off by default. This is because claim extraction generally requires prompt tuning to be useful.

-## Phase 3: Graph Augmentation
+## Phase 4: Graph Augmentation

-Now that we have a usable graph of entities and relationships, we want to understand their community structure. These give us explicit ways of understanding the topological structure of our graph.
+Now that we have a usable graph of entities and relationships, we want to understand their community structure. These give us explicit ways of understanding the organization of our graph.

 ```mermaid
 ---
@ -123,7 +141,7 @@ In this step, we generate a hierarchy of entity communities using the Hierarchic

 Once our graph augmentation steps are complete, the final **Entities**, **Relationships**, and **Communities** tables are exported.

-## Phase 4: Community Summarization
+## Phase 5: Community Summarization

 ```mermaid
 ---
@ -149,31 +167,6 @@ In this step, each _community report_ is then summarized via the LLM for shortha

 At this point, some bookkeeping work is performed and we export the **Community Reports** tables.

-## Phase 5: Document Processing
-
-In this phase of the workflow, we create the _Documents_ table for the knowledge model.
-
-```mermaid
---
-title: Document Processing
---
-flowchart LR
-    aug[Augment] --> dp[Link to TextUnits] --> dg[Documents Table]
-```
-
-### Augment with Columns (CSV Only)
-
-If the workflow is operating on CSV data, you may configure your workflow to add additional fields to Documents output. These fields should exist on the incoming CSV tables. Details about configuring this can be found in the [configuration documentation](../config/overview.md).
-
-### Link to TextUnits
-
-In this step, we link each document to the text-units that were created in the first phase. This allows us to understand which documents are related to which text-units and vice-versa.
-
-### Documents Table
-
-At this point, we can export the **Documents** table into the knowledge Model.
-
-
 ## Phase 6: Text Embedding

 For all artifacts that require downstream vector search, we generate text embeddings as a final step. These embeddings are written directly to a configured vector store. By default we embed entity descriptions, text unit text, and community report text.
--- a/docs/index/inputs.md
+++ b/docs/index/inputs.md
@ -18,15 +18,15 @@ Also see the [outputs](outputs.md) documentation for the final documents table s

 ## Bring-your-own DataFrame

-As of version 2.6.0, GraphRAG's [indexing API method](https://github.com/microsoft/graphrag/blob/main/graphrag/api/index.py) allows you to pass in your own pandas DataFrame and bypass all of the input loading/parsing described in the next section. This is convenient if you have content in a format or storage location we don't support out-of-the-box. __You must ensure that your input DataFrame conforms to the schema described above.__ All of the chunking behavior described later will proceed exactly the same.
+GraphRAG's [indexing API method](https://github.com/microsoft/graphrag/blob/main/graphrag/api/index.py) allows you to pass in your own pandas DataFrame and bypass all of the input loading/parsing described in the next section. This is convenient if you have content in a format or storage location we don't support out-of-the-box. _You must ensure that your input DataFrame conforms to the schema described above._ All of the chunking behavior described later will proceed exactly the same.

 ## Custom File Handling

-As of version 3.0.0, we have migrated to using an injectable InputReader provider class. This means you can implement any input file handling you want in a class that extends InputReader and register it with the InputReaderFactory. See the [architecture page](https://microsoft.github.io/graphrag/index/architecture/) for more info on our standard provider pattern.
+We use an injectable InputReader provider class. This means you can implement any input file handling you want in a class that extends InputReader and register it with the InputReaderFactory. See the [architecture page](https://microsoft.github.io/graphrag/index/architecture/) for more info on our standard provider pattern.

 ## Formats

-We support three file formats out-of-the-box. This covers the overwhelming majority of use cases we have encountered. If you have a different format, we recommend writing a script to convert to one of these, which are widely used and supported by many tools and libraries.
+We support three file formats out-of-the-box. This covers the overwhelming majority of use cases we have encountered. If you have a different format, we recommend either implementing your own InputReader or writing a script to convert to one of these, which are widely used and supported by many tools and libraries.

 ### Plain Text

@ -44,7 +44,7 @@ JSON files (typically ending in a .json extension) contain [structured objects](

 ## Metadata

-With the structured file formats (CSV and JSON) you can configure any number of columns to be added to a persisted `metadata` field in the DataFrame. This is configured by supplying a list of columns name to collect. If this is configured, the output `metadata` column will have a dict containing a key for each column, and the value of the column for that document. This metadata can optionally be used later in the GraphRAG pipeline.
+With the structured file formats (CSV and JSON) you can configure any number of columns to be added to a persisted `metadata` field in the DataFrame. This is configured by supplying a list of column names to collect. If this is configured, the output `metadata` column will have a dict containing a key for each column, and the value of the column for that document. This metadata can optionally be used later in the GraphRAG pipeline.

 ### Example

@ -72,7 +72,7 @@ Documents DataFrame

 ## Chunking and Metadata

-As described on the [default dataflow](default_dataflow.md#phase-1-compose-textunits) page, documents are *chunked* into smaller "text units" for processing. This is done because document content size often exceeds the available context window for a given language model. There are a handful of settings you can adjust for this chunking, the most relevant being the `chunk_size` and `overlap`. We now also support a metadata processing scheme that can improve indexing results for some use cases. We will describe this feature in detail here.
+As described on the [default dataflow](default_dataflow.md#phase-1-compose-textunits) page, documents are *chunked* into smaller "text units" for processing. This is done because document content size often exceeds the available context window for a given language model. There are a handful of settings you can adjust for this chunking, the most relevant being the `chunk_size` and `overlap`. We also support a metadata processing scheme that can improve indexing results for some use cases. We will describe this feature in detail here.

 Imagine the following scenario: you are indexing a collection of news articles. Each article text starts with a headline and author, and then proceeds with the content. When documents are chunked, they are split evenly according to your configured chunk size. In other words, the first *n* tokens are read into a text unit, and then the next *n*, until the end of the content. This means that front matter at the beginning of the document (such as the headline and author in this example) *is not copied to each chunk*. It only exists in the first chunk. When we later retrieve those chunks for summarization, they may therefore be missing shared information about the source document that should always be provided to the model. We have configuration options to copy repeated content into each text unit to address this issue.

@ -89,7 +89,7 @@ Next, the `chunks` block needs to instruct the chunker how to handle this metada

 ### Examples

-The following are several examples to help illustrate how chunking config and metadate prepending works for each file format. Note that we are using word count here as "tokens" for the illustration, but language model tokens are [not equivalent to words](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them).
+The following are several examples to help illustrate how chunking config and metadata prepending works for each file format. Note that we are using word count here as "tokens" for the illustration, but language model tokens are [not equivalent to words](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them).

 #### Text files

--- a/docs/index/methods.md
+++ b/docs/index/methods.md
@ -13,7 +13,7 @@ This is the method described in the original [blog post](https://www.microsoft.c
 - claim extraction (optional): LLM is prompted to extract and describe claims from each text unit.
 - community report generation: entity and relationship descriptions (and optionally claims) for each community are collected and used to prompt the LLM to generate a summary report.

-`graphrag index --method standard`. This is the default method, so the method param can actual be omitted.
+`graphrag index --method standard`. This is the default method, so the method param can be omitted on the command line.

 ## FastGraphRAG

@ -23,7 +23,7 @@ FastGraphRAG is a method that substitutes some of the language model reasoning f
 - relationship extraction: relationships are defined as text unit co-occurrence between entity pairs. There is no description.
 - entity summarization: not necessary.
 - relationship summarization: not necessary.
- claim extraction (optional): unused.
+- claim extraction: unused.
 - community report generation: The direct text unit content containing each entity noun phrase is collected and used to prompt the LLM to generate a summary report.

 `graphrag index --method fast`
@ -41,4 +41,4 @@ You can install it manually by running `python -m spacy download <model_name>`,

 ## Choosing a Method

-Standard GraphRAG provides a rich description of real-world entities and relationships, but is more expensive than FastGraphRAG. We estimate graph extraction to constitute roughly 75% of indexing cost. FastGraphRAG is therefore much cheaper, but the tradeoff is that the extracted graph is less directly relevant for use outside of GraphRAG, and the graph tends to be quite a bit noisier. If high fidelity entities and graph exploration are important to your use case, we recommend staying with traditional GraphRAG. If your use case is primarily aimed at summary questions using global search, FastGraphRAG provides high quality summarization at much less LLM cost.
+Standard GraphRAG provides a rich description of real-world entities and relationships, but is more expensive than FastGraphRAG. We estimate graph extraction to constitute roughly 75% of indexing cost. FastGraphRAG is therefore much cheaper, but the tradeoff is that the extracted graph is less directly relevant for use outside of GraphRAG, and the graph tends to be quite a bit noisier. If high fidelity entities and graph exploration are important to your use case, we recommend staying with traditional GraphRAG. If your use case is primarily aimed at summary questions using global search, FastGraphRAG provides high quality summarization with much lower language model cost.
--- a/docs/index/overview.md
+++ b/docs/index/overview.md
@ -7,8 +7,7 @@ Indexing Pipelines are configurable. They are composed of workflows, standard an
 - extract entities, relationships and claims from raw text
 - perform community detection in entities
 - generate community summaries and reports at multiple levels of granularity
- embed entities into a graph vector space
- embed text chunks into a textual vector space
+- embed text into a vector space

 The outputs of the pipeline are stored as Parquet tables by default, and embeddings are written to your configured vector store.

--- a/docs/prompt_tuning/auto_prompt_tuning.md
+++ b/docs/prompt_tuning/auto_prompt_tuning.md
@ -1,8 +1,8 @@
 # Auto Prompt Tuning ⚙️

-GraphRAG provides the ability to create domain adapted prompts for the generation of the knowledge graph. This step is optional, though it is highly encouraged to run it as it will yield better results when executing an Index Run.
+GraphRAG provides the ability to create domain-adapted prompts for the generation of the knowledge graph. This step is optional, though it is highly encouraged to run it as it will yield better results when executing an Index Run.

-These are generated by loading the inputs, splitting them into chunks (text units) and then running a series of LLM invocations and template substitutions to generate the final prompts. We suggest using the default values provided by the script, but in this page you'll find the detail of each in case you want to further explore and tweak the prompt tuning algorithm.
+These are generated by loading the inputs, splitting them into chunks (text units) and then running a series of LLM invocations and template substitutions to generate the final prompts. We suggest using the default values provided by the script, but in this page you'll find the details of each in case you want to further explore and tweak the prompt tuning algorithm.

 <p align="center">
 <img src="../../img/auto-tune-diagram.png" alt="Figure 1: Auto Tuning Conceptual Diagram." width="850" align="center" />
@ -73,17 +73,17 @@ The auto tuning feature ingests the input data and then divides it into text uni
 After that, it uses one of the following selection methods to pick a sample to work with for prompt generation:

 - `random`: Select text units randomly. This is the default and recommended option.
- `top`: Select the head n text units.
+- `top`: Select the head _n_ text units.
 - `all`: Use all text units for the generation. Use only with small datasets; this option is not usually recommended.
- `auto`: Embed text units in a lower-dimensional space and select the k nearest neighbors to the centroid. This is useful when you have a large dataset and want to select a representative sample.
+- `auto`: Embed text units in a lower-dimensional space and select the _k_ nearest neighbors to the centroid. This is useful when you have a large dataset and want to select a representative sample.

-## Modify Env Vars
+## Modify Config

 After running auto tuning, you should modify the following config variables to pick up the new prompts on your index run. Note: Please make sure to update the correct path to the generated prompts, in this example we are using the default "prompts" path.

 ```yaml
-entity_extraction:
-  prompt: "prompts/entity_extraction.txt"
+extract_graph:
+  prompt: "prompts/extract_graph.txt"

 summarize_descriptions:
  prompt: "prompts/summarize_descriptions.txt"
--- a/docs/prompt_tuning/overview.md
+++ b/docs/prompt_tuning/overview.md
@ -8,8 +8,8 @@ The default prompts are the simplest way to get started with the GraphRAG system

 ## Auto Tuning

-Auto Tuning leverages your input data and LLM interactions to create domain adapted prompts for the generation of the knowledge graph. It is highly encouraged to run it as it will yield better results when executing an Index Run. For more details about how to use it, please refer to the [Auto Tuning](auto_prompt_tuning.md) documentation.
+Auto Tuning leverages your input data and LLM interactions to create domain-adapted prompts for the generation of the knowledge graph. It is highly encouraged to run it as it will yield better results when executing an Index Run. For more details about how to use it, please refer to the [Auto Tuning](auto_prompt_tuning.md) page.

 ## Manual Tuning

-Manual tuning is an advanced use-case. Most users will want to use the Auto Tuning feature instead. Details about how to use manual configuration are available in the [manual tuning](manual_prompt_tuning.md) documentation.
+Manual tuning is an advanced use-case. Most users will want to use the Auto Tuning feature instead. Details about how to use manual configuration are available in the [manual tuning](manual_prompt_tuning.md) page.
--- a/docs/query/drift_search.md
+++ b/docs/query/drift_search.md
@ -21,7 +21,7 @@ DRIFT Search introduces a new approach to local search queries by including comm

 Below are the key parameters of the [DRIFTSearch class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/drift_search/search.py):

- `llm`: OpenAI model object to be used for response generation
+* `model`: Language model chat completion object to be used for response generation
 - `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main/graphrag/query/structured_search/drift_search/drift_context.py) object to be used for preparing context data from community reports and query information
 - `config`: model to define the DRIFT Search hyperparameters. [DRIFT Config model](https://github.com/microsoft/graphrag/blob/main/graphrag/config/models/drift_search_config.py)
 - `tokenizer`: token encoder for tracking the budget for the algorithm.
--- a/docs/query/global_search.md
+++ b/docs/query/global_search.md
@ -54,7 +54,7 @@ The quality of the global search’s response can be heavily influenced by the l

 Below are the key parameters of the [GlobalSearch class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/search.py):

-* `llm`: OpenAI model object to be used for response generation
+* `model`: Language model chat completion object to be used for response generation
 * `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/community_context.py) object to be used for preparing context data from community reports
 * `map_system_prompt`: prompt template used in the `map` stage. Default template can be found at [map_system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/prompts/query/global_search_map_system_prompt.py)
 * `reduce_system_prompt`: prompt template used in the `reduce` stage, default template can be found at [reduce_system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/prompts/query/global_search_reduce_system_prompt.py)
--- a/docs/query/local_search.md
+++ b/docs/query/local_search.md
@ -48,7 +48,7 @@ Given a user query and, optionally, the conversation history, the local search m

 Below are the key parameters of the [LocalSearch class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/search.py):

-* `llm`: OpenAI model object to be used for response generation
+* `model`: Language model chat completion object to be used for response generation
 * `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py) object to be used for preparing context data from collections of knowledge model objects
 * `system_prompt`: prompt template used to generate the search response. Default template can be found at [system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/prompts/query/local_search_system_prompt.py)
 * `response_type`: free-form text describing the desired response type and format (e.g., `Multiple Paragraphs`, `Multi-Page Report`)
--- a/docs/query/overview.md
+++ b/docs/query/overview.md
@ -1,34 +1,35 @@
 # Query Engine 🔎

-The Query Engine is the retrieval module of the Graph RAG Library. It is one of the two main components of the Graph RAG library, the other being the Indexing Pipeline (see [Indexing Pipeline](../index/overview.md)).
+The Query Engine is the retrieval module of the GraphRAG library, and operates over completed [indexes](../index/overview.md).
 It is responsible for the following tasks:

 - [Local Search](#local-search)
 - [Global Search](#global-search)
 - [DRIFT Search](#drift-search)
+- Basic Search
 - [Question Generation](#question-generation)

 ## Local Search

-Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).
+Local search generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).

-For more details about how Local Search works please refer to the [Local Search](local_search.md) documentation.
+For more details about how Local Search works please refer to the [Local Search](local_search.md) page.

 ## Global Search

-Global search method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole (e.g. What are the most significant values of the herbs mentioned in this notebook?).
+Global search generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole (e.g. What are the most significant values of the herbs mentioned in this notebook?).

-More about this can be checked at the [Global Search](global_search.md) documentation.
+More about this is provided on the [Global Search](global_search.md) page.

 ## DRIFT Search

-DRIFT Search introduces a new approach to local search queries by including community information in the search process. This greatly expands the breadth of the query’s starting point and leads to retrieval and usage of a far higher variety of facts in the final answer. This addition expands the GraphRAG query engine by providing a more comprehensive option for local search, which uses community insights to refine a query into detailed follow-up questions.
+DRIFT Search introduces a new approach to local search queries by including community information in the search process. This greatly expands the breadth of the query’s starting point and leads to retrieval and usage of a far higher variety of facts in the final answer. This expands the GraphRAG query engine by providing a more comprehensive option for local search, which uses community insights to refine a query into detailed follow-up questions.

-To learn more about DRIFT Search, please refer to the [DRIFT Search](drift_search.md) documentation.
+To learn more about DRIFT Search, please refer to the [DRIFT Search](drift_search.md) page.

 ## Basic Search

-GraphRAG includes a rudimentary implementation of basic vector RAG to make it easy to compare different search results based on the type of question you are asking. You can specify the top `k` txt unit chunks to include in the summarization context.
+GraphRAG includes a rudimentary implementation of basic vector RAG to make it easy to compare different search results based on the type of question you are asking. You can specify the top `k` text unit chunks to include in the summarization context.

 ## Question Generation

--- a/docs/query/question_generation.md
+++ b/docs/query/question_generation.md
@ -11,7 +11,7 @@ Given a list of prior user questions, the question generation method uses the sa

 Below are the key parameters of the [Question Generation class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/question_gen/local_gen.py):

-* `llm`: OpenAI model object to be used for response generation
+* `model`: Language model chat completion object to be used for response generation
 * `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py) object to be used for preparing context data from collections of knowledge model objects, using the same context builder class as in local search
 * `system_prompt`: prompt template used to generate candidate questions. Default template can be found at [system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/prompts/query/question_gen_system_prompt.py)
 * `llm_params`: a dictionary of additional parameters (e.g., temperature, max_tokens) to be passed to the LLM call
--- a/unified-search-app/README.md
+++ b/unified-search-app/README.md
@ -39,8 +39,8 @@ Unified Search supports multiple GraphRAG indexes by using a directory listing f
 For example, if you have a folder of GraphRAG indexes called "projects" and inside that you ran the Getting Started instructions, your listing.json in the projects folder could look like:
 ```json
 [{
-    "key": "ragtest-demo",
-    "path": "ragtest",
+    "key": "christmas-demo",
+    "path": "christmas",
    "name": "A Christmas Carol",
    "description": "Getting Started index of the novel A Christmas Carol",
    "community_level": 2
--- a/unified-search-app/app/app_logic.py
+++ b/unified-search-app/app/app_logic.py
@ -7,6 +7,7 @@ import asyncio
 import logging
 from typing import TYPE_CHECKING

+import graphrag.api as api
 import streamlit as st
 from knowledge_loader.data_sources.loader import (
    create_datasource,
@ -17,8 +18,6 @@ from rag.typing import SearchResult, SearchType
 from state.session_variables import SessionVariables
 from ui.search import display_search_result

-import graphrag.api as api
-
 if TYPE_CHECKING:
    import pandas as pd