mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-27 22:31:57 +08:00
Deploying to gh-pages from @ microsoft/graphrag@ac0be810bd 🚀
This commit is contained in:
parent
a171aeba3d
commit
deafae3795
Binary file not shown.
@ -389,7 +389,8 @@ a {
|
||||
</div>
|
||||
<h1>> input</h1>
|
||||
<ul>
|
||||
<li><code>type</code>: The input type field discriminates between the different input types. Options are <code>csv</code> and <code>text</code>.</li>
|
||||
<li><code>type</code>: The type of input to use. Options are <code>file</code> or <code>blob</code>.</li>
|
||||
<li><code>file_type</code>: The file type field discriminates between the different input types. Options are <code>csv</code> and <code>text</code>.</li>
|
||||
<li><code>base_dir</code>: The base directory to read the input files from. This is relative to the config file.</li>
|
||||
<li><code>file_pattern</code>: A regex to match the input files. The regex must have named groups for each of the fields in the file_filter.</li>
|
||||
<li><code>post_process</code>: A DataShaper workflow definition to apply to the input before executing the primary workflow.</li>
|
||||
@ -400,8 +401,9 @@ a {
|
||||
</ul>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-yaml"><code id="code-213" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
|
||||
<span class="token key atrule">type</span><span class="token punctuation">:</span> csv
|
||||
<pre class="language-yaml"><code id="code-218" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
|
||||
<span class="token key atrule">type</span><span class="token punctuation">:</span> file
|
||||
<span class="token key atrule">file_type</span><span class="token punctuation">:</span> csv
|
||||
<span class="token key atrule">base_dir</span><span class="token punctuation">:</span> ../data/csv <span class="token comment"># the directory containing the CSV files, this is relative to the config file</span>
|
||||
<span class="token key atrule">file_pattern</span><span class="token punctuation">:</span> <span class="token string">'.*[\/](?P<source>[^\/]+)[\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.csv$'</span> <span class="token comment"># a regex to match the CSV files</span>
|
||||
<span class="token comment"># An additional file filter which uses the named groups from the file_pattern to further filter the files</span>
|
||||
@ -420,14 +422,15 @@ a {
|
||||
<span class="token key atrule">column</span><span class="token punctuation">:</span> <span class="token string">"title"</span><span class="token punctuation">,</span>
|
||||
<span class="token key atrule">value</span><span class="token punctuation">:</span> <span class="token string">"My document"</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-213" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-218" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style="position: relative">
|
||||
<pre class="language-yaml"><code id="code-214" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
|
||||
<span class="token key atrule">type</span><span class="token punctuation">:</span> text
|
||||
<pre class="language-yaml"><code id="code-219" class="language-yaml"><span class="token key atrule">input</span><span class="token punctuation">:</span>
|
||||
<span class="token key atrule">type</span><span class="token punctuation">:</span> file
|
||||
<span class="token key atrule">file_type</span><span class="token punctuation">:</span> csv
|
||||
<span class="token key atrule">base_dir</span><span class="token punctuation">:</span> ../data/csv <span class="token comment"># the directory containing the CSV files, this is relative to the config file</span>
|
||||
<span class="token key atrule">file_pattern</span><span class="token punctuation">:</span> <span class="token string">'.*[\/](?P<source>[^\/]+)[\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.csv$'</span> <span class="token comment"># a regex to match the CSV files</span>
|
||||
<span class="token comment"># An additional file filter which uses the named groups from the file_pattern to further filter the files</span>
|
||||
@ -442,7 +445,7 @@ a {
|
||||
<span class="token key atrule">column</span><span class="token punctuation">:</span> <span class="token string">"title"</span><span class="token punctuation">,</span>
|
||||
<span class="token key atrule">value</span><span class="token punctuation">:</span> <span class="token string">"My document"</span></code></pre>
|
||||
|
||||
<button class="code-copy " data-clipboard-target="#code-214" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<button class="code-copy " data-clipboard-target="#code-219" style="position: absolute; top: 7.5px; right: 6px; padding-top: 3px; cursor: pointer; outline: none; opacity: 0.8;" title="Copy">
|
||||
<span style="display:inline-block;background:url(https://api.iconify.design/mdi/content-copy.svg) no-repeat center center / contain;width: 16px; height: 16px;" class=""></span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@ -648,7 +648,7 @@ a {
|
||||
</table>
|
||||
<h2>Input Settings</h2>
|
||||
<p>These settings control the data input used by the pipeline. Any settings with a fallback will use the base LLM settings, if available.</p>
|
||||
<h3>Plaintext Input Data (<code>GRAPHRAG_INPUT_TYPE</code>=text)</h3>
|
||||
<h3>Plaintext Input Data (<code>GRAPHRAG_INPUT_FILE_TYPE</code>=text)</h3>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
@ -669,7 +669,7 @@ a {
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>CSV Input Data (<code>GRAPHRAG_INPUT_TYPE</code>=csv)</h3>
|
||||
<h3>CSV Input Data (<code>GRAPHRAG_INPUT_FILE_TYPE</code>=csv)</h3>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
@ -682,6 +682,13 @@ a {
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><code>GRAPHRAG_INPUT_TYPE</code></td>
|
||||
<td>The input storage type to use when reading files. (<code>file</code> or <code>blob</code>)</td>
|
||||
<td><code>str</code></td>
|
||||
<td>optional</td>
|
||||
<td><code>file</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>GRAPHRAG_INPUT_FILE_PATTERN</code></td>
|
||||
<td>The file pattern regexp to use when reading input files from the input directory.</td>
|
||||
<td><code>str</code></td>
|
||||
@ -731,13 +738,6 @@ a {
|
||||
<td><code>title</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>GRAPHRAG_INPUT_STORAGE_TYPE</code></td>
|
||||
<td>The storage type to use when reading CSV input files. (<code>file</code> or <code>blob</code>)</td>
|
||||
<td><code>str</code></td>
|
||||
<td>optional</td>
|
||||
<td><code>file</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>GRAPHRAG_INPUT_STORAGE_ACCOUNT_BLOB_URL</code></td>
|
||||
<td>The Azure Storage blob endpoint to use when in <code>blob</code> mode and using managed identity. Will have the format <code>https://<storage_account_name>.blob.core.windows.net</code></td>
|
||||
<td><code>str</code></td>
|
||||
@ -780,7 +780,7 @@ a {
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><code>GRAPHRAG_INPUT_TYPE</code></td>
|
||||
<td><code>GRAPHRAG_INPUT_FILE_TYPE</code></td>
|
||||
<td>The type of input data, <code>csv</code> or <code>text</code></td>
|
||||
<td><code>str</code></td>
|
||||
<td>optional</td>
|
||||
|
||||
@ -286,7 +286,8 @@ API_KEY=some_api_key
|
||||
<h2>input</h2>
|
||||
<h3>Fields</h3>
|
||||
<ul>
|
||||
<li><code>type</code> <strong>text|csv</strong> - The type of input data to load. Either <code>text</code> or <code>csv</code>. Default is <code>csv</code></li>
|
||||
<li><code>type</code> <strong>file|blob</strong> - The input type to use. Default=<code>file</code></li>
|
||||
<li><code>file_type</code> <strong>text|csv</strong> - The type of input data to load. Either <code>text</code> or <code>csv</code>. Default is <code>csv</code></li>
|
||||
<li><code>file_encoding</code> <strong>str</strong> - The encoding of the input file. Default is <code>utf-8</code></li>
|
||||
<li><code>file_pattern</code> <strong>str</strong> - A regex to match input files. Default is <code>.*\.csv$</code> if in csv mode and <code>.*\.txt$</code> if in text mode.</li>
|
||||
<li><code>source_column</code> <strong>str</strong> - (CSV Mode Only) The source column name.</li>
|
||||
@ -295,7 +296,6 @@ API_KEY=some_api_key
|
||||
<li><code>text_column</code> <strong>str</strong> - (CSV Mode Only) The text column name.</li>
|
||||
<li><code>title_column</code> <strong>str</strong> - (CSV Mode Only) The title column name.</li>
|
||||
<li><code>document_attribute_columns</code> <strong>list[str]</strong> - (CSV Mode Only) The additional document attributes to include.</li>
|
||||
<li><code>storage_type</code> <strong>file|blob</strong> - The input storage type to use. Default=<code>file</code></li>
|
||||
<li><code>connection_string</code> <strong>str</strong> - (blob only) The Azure Storage connection string.</li>
|
||||
<li><code>container_name</code> <strong>str</strong> - (blob only) The Azure Storage container name.</li>
|
||||
<li><code>base_dir</code> <strong>str</strong> - The base directory to read input from, relative to the root.</li>
|
||||
|
||||
@ -313,7 +313,7 @@ the <code>--root</code> parameter on your Indexing Pipeline execution.</p>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt</span>
|
||||
|
||||
<span class="token comment"># CSV Input Data Configuration</span>
|
||||
<span class="token assign-left variable">GRAPHRAG_INPUT_TYPE</span><span class="token operator">=</span><span class="token string">"csv"</span>
|
||||
<span class="token assign-left variable">GRAPHRAG_INPUT_FILE_TYPE</span><span class="token operator">=</span><span class="token string">"csv"</span>
|
||||
<span class="token assign-left variable">GRAPHRAG_INPUT_FILE_PATTERN</span><span class="token operator">=</span><span class="token string">".*\.csv$"</span>
|
||||
<span class="token assign-left variable">GRAPHRAG_INPUT_SOURCE_COLUMN</span><span class="token operator">=</span>source
|
||||
<span class="token comment"># GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None</span>
|
||||
@ -321,7 +321,7 @@ the <code>--root</code> parameter on your Indexing Pipeline execution.</p>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_TEXT_COLUMN="text"</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_TITLE_COLUMN="title"</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_STORAGE_TYPE="file"</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_TYPE="file"</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_CONNECTION_STRING=None</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_CONTAINER_NAME=None</span>
|
||||
<span class="token comment"># GRAPHRAG_INPUT_BASE_DIR=None</span>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user