Deploying to gh-pages from @ microsoft/graphrag@ac0be810bd 🚀

2026-01-27 22:31:57 +08:00 · 2024-06-13 15:38:42 +00:00 · 2024-06-13 15:38:42 +00:00 · deafae3795
commit deafae3795
parent a171aeba3d
5 changed files with 24 additions and 21 deletions
--- a/data/operation_dulce/dataset.zip
+++ b/data/operation_dulce/dataset.zip
--- a/posts/config/custom/index.html
+++ b/posts/config/custom/index.html
@ -389,7 +389,8 @@ a {
 </div>
 <h1>&gt; input</h1>
 <ul>
-<li><code>type</code>: The input type field discriminates between the different input types. Options are <code>csv</code> and <code>text</code>.</li>
+<li><code>type</code>: The type of input to use. Options are <code>file</code> or <code>blob</code>.</li>
+<li><code>file_type</code>: The file type field discriminates between the different input types. Options are <code>csv</code> and <code>text</code>.</li>
 <li><code>base_dir</code>: The base directory to read the input files from. This is relative to the config file.</li>
 <li><code>file_pattern</code>: A regex to match the input files. The regex must have named groups for each of the fields in the file_filter.</li>
 <li><code>post_process</code>: A DataShaper workflow definition to apply to the input before executing the primary workflow.</li>
@ -400,8 +401,9 @@ a {
 </ul>

 <div style="position: relative">
-  <pre class="language-yaml"><code id="code-213" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
-  <span class="token key atrule">type</span><span class="token punctuation">:</span> csv
+  <pre class="language-yaml"><code id="code-218" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
+  <span class="token key atrule">type</span><span class="token punctuation">:</span> file
+  <span class="token key atrule">file_type</span><span class="token punctuation">:</span> csv
  <span class="token key atrule">base_dir</span><span class="token punctuation">:</span> ../data/csv <span class="token comment"># the directory containing the CSV files, this is relative to the config file</span>
  <span class="token key atrule">file_pattern</span><span class="token punctuation">:</span> <span class="token string">'.*[\/](?P&lt;source>[^\/]+)[\/](?P&lt;year>\d{4})-(?P&lt;month>\d{2})-(?P&lt;day>\d{2})_(?P&lt;author>[^_]+)_\d+\.csv$'</span> <span class="token comment"># a regex to match the CSV files</span>
  <span class="token comment"># An additional file filter which uses the named groups from the file_pattern to further filter the files</span>
@ -420,14 +422,15 @@ a {
        <span class="token key atrule">column</span><span class="token punctuation">:</span> <span class="token string">"title"</span><span class="token punctuation">,</span>
        <span class="token key atrule">value</span><span class="token punctuation">:</span> <span class="token string">"My document"</span></code></pre>

-  <button class="code-copy " data-clipboard-target="#code-213" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
+  <button class="code-copy " data-clipboard-target="#code-218" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
    <span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
  </button>
 </div>

 <div style="position: relative">
-  <pre class="language-yaml"><code id="code-214" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
-  <span class="token key atrule">type</span><span class="token punctuation">:</span> text
+  <pre class="language-yaml"><code id="code-219" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
+  <span class="token key atrule">type</span><span class="token punctuation">:</span> file
+  <span class="token key atrule">file_type</span><span class="token punctuation">:</span> csv
  <span class="token key atrule">base_dir</span><span class="token punctuation">:</span> ../data/csv <span class="token comment"># the directory containing the CSV files, this is relative to the config file</span>
  <span class="token key atrule">file_pattern</span><span class="token punctuation">:</span> <span class="token string">'.*[\/](?P&lt;source>[^\/]+)[\/](?P&lt;year>\d{4})-(?P&lt;month>\d{2})-(?P&lt;day>\d{2})_(?P&lt;author>[^_]+)_\d+\.csv$'</span> <span class="token comment"># a regex to match the CSV files</span>
  <span class="token comment"># An additional file filter which uses the named groups from the file_pattern to further filter the files</span>
@ -442,7 +445,7 @@ a {
        <span class="token key atrule">column</span><span class="token punctuation">:</span> <span class="token string">"title"</span><span class="token punctuation">,</span>
        <span class="token key atrule">value</span><span class="token punctuation">:</span> <span class="token string">"My document"</span></code></pre>

-  <button class="code-copy " data-clipboard-target="#code-214" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
+  <button class="code-copy " data-clipboard-target="#code-219" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
    <span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
  </button>
 </div>
--- a/posts/config/env_vars/index.html
+++ b/posts/config/env_vars/index.html
@ -648,7 +648,7 @@ a {
 </table>
 <h2>Input Settings</h2>
 <p>These settings control the data input used by the pipeline. Any settings with a fallback will use the base LLM settings, if available.</p>
-<h3>Plaintext Input Data (<code>GRAPHRAG_INPUT_TYPE</code>=text)</h3>
+<h3>Plaintext Input Data (<code>GRAPHRAG_INPUT_FILE_TYPE</code>=text)</h3>
 <table>
 <thead>
 <tr>
@ -669,7 +669,7 @@ a {
 </tr>
 </tbody>
 </table>
-<h3>CSV Input Data (<code>GRAPHRAG_INPUT_TYPE</code>=csv)</h3>
+<h3>CSV Input Data (<code>GRAPHRAG_INPUT_FILE_TYPE</code>=csv)</h3>
 <table>
 <thead>
 <tr>
@ -682,6 +682,13 @@ a {
 </thead>
 <tbody>
 <tr>
+<td><code>GRAPHRAG_INPUT_TYPE</code></td>
+<td>The input storage type to use when reading files.  (<code>file</code> or <code>blob</code>)</td>
+<td><code>str</code></td>
+<td>optional</td>
+<td><code>file</code></td>
+</tr>
+<tr>
 <td><code>GRAPHRAG_INPUT_FILE_PATTERN</code></td>
 <td>The file pattern regexp to use when reading input files from the input directory.</td>
 <td><code>str</code></td>
@ -731,13 +738,6 @@ a {
 <td><code>title</code></td>
 </tr>
 <tr>
-<td><code>GRAPHRAG_INPUT_STORAGE_TYPE</code></td>
-<td>The storage type to use when reading CSV input files.  (<code>file</code> or <code>blob</code>)</td>
-<td><code>str</code></td>
-<td>optional</td>
-<td><code>file</code></td>
-</tr>
-<tr>
 <td><code>GRAPHRAG_INPUT_STORAGE_ACCOUNT_BLOB_URL</code></td>
 <td>The Azure Storage blob endpoint to use when in <code>blob</code> mode and using managed identity.  Will have the format <code>https://&lt;storage_account_name&gt;.blob.core.windows.net</code></td>
 <td><code>str</code></td>
@ -780,7 +780,7 @@ a {
 </thead>
 <tbody>
 <tr>
-<td><code>GRAPHRAG_INPUT_TYPE</code></td>
+<td><code>GRAPHRAG_INPUT_FILE_TYPE</code></td>
 <td>The type of input data, <code>csv</code> or <code>text</code></td>
 <td><code>str</code></td>
 <td>optional</td>
--- a/posts/config/json_yaml/index.html
+++ b/posts/config/json_yaml/index.html
@ -286,7 +286,8 @@ API_KEY=some_api_key
 <h2>input</h2>
 <h3>Fields</h3>
 <ul>
-<li><code>type</code> <strong>text|csv</strong> - The type of input data to load. Either <code>text</code> or <code>csv</code>. Default is <code>csv</code></li>
+<li><code>type</code> <strong>file|blob</strong> - The input type to use. Default=<code>file</code></li>
+<li><code>file_type</code> <strong>text|csv</strong> - The type of input data to load. Either <code>text</code> or <code>csv</code>. Default is <code>csv</code></li>
 <li><code>file_encoding</code> <strong>str</strong> - The encoding of the input file. Default is <code>utf-8</code></li>
 <li><code>file_pattern</code> <strong>str</strong> - A regex to match input files. Default is <code>.*\.csv$</code> if in csv mode and <code>.*\.txt$</code> if in text mode.</li>
 <li><code>source_column</code> <strong>str</strong> - (CSV Mode Only) The source column name.</li>
@ -295,7 +296,6 @@ API_KEY=some_api_key
 <li><code>text_column</code> <strong>str</strong> - (CSV Mode Only) The text column name.</li>
 <li><code>title_column</code> <strong>str</strong> - (CSV Mode Only) The title column name.</li>
 <li><code>document_attribute_columns</code> <strong>list[str]</strong> - (CSV Mode Only) The additional document attributes to include.</li>
-<li><code>storage_type</code> <strong>file|blob</strong> - The input storage type to use. Default=<code>file</code></li>
 <li><code>connection_string</code> <strong>str</strong> - (blob only) The Azure Storage connection string.</li>
 <li><code>container_name</code> <strong>str</strong> - (blob only) The Azure Storage container name.</li>
 <li><code>base_dir</code> <strong>str</strong> - The base directory to read input from, relative to the root.</li>
--- a/posts/config/template/index.html
+++ b/posts/config/template/index.html
@ -313,7 +313,7 @@ the <code>--root</code> parameter on your Indexing Pipeline execution.</p>
 <span class="token comment"># GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt</span>

 <span class="token comment"># CSV Input Data Configuration</span>
-<span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span><span class="token string">"csv"</span>
+<span class="token assign-left variable">GRAPHRAG_INPUT_FILE_TYPE</span><span class="token operator">=</span><span class="token string">"csv"</span>
 <span class="token assign-left variable">GRAPHRAG_INPUT_FILE_PATTERN</span><span class="token operator">=</span><span class="token string">".*\.csv$"</span>
 <span class="token assign-left variable">GRAPHRAG_INPUT_SOURCE_COLUMN</span><span class="token operator">=</span>source
 <span class="token comment"># GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None</span>
@ -321,7 +321,7 @@ the <code>--root</code> parameter on your Indexing Pipeline execution.</p>
 <span class="token comment"># GRAPHRAG_INPUT_TEXT_COLUMN="text"</span>
 <span class="token comment"># GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id</span>
 <span class="token comment"># GRAPHRAG_INPUT_TITLE_COLUMN="title"</span>
-<span class="token comment"># GRAPHRAG_INPUT_STORAGE_TYPE="file"</span>
+<span class="token comment"># GRAPHRAG_INPUT_TYPE="file"</span>
 <span class="token comment"># GRAPHRAG_INPUT_CONNECTION_STRING=None</span>
 <span class="token comment"># GRAPHRAG_INPUT_CONTAINER_NAME=None</span>
 <span class="token comment"># GRAPHRAG_INPUT_BASE_DIR=None</span>