diff --git a/data/operation_dulce/dataset.zip b/data/operation_dulce/dataset.zip index d8a5123b..0e9d37a5 100644 Binary files a/data/operation_dulce/dataset.zip and b/data/operation_dulce/dataset.zip differ diff --git a/posts/config/custom/index.html b/posts/config/custom/index.html index 3a0d82ae..d46c08a7 100644 --- a/posts/config/custom/index.html +++ b/posts/config/custom/index.html @@ -389,7 +389,8 @@ a {

> input

-
input:
-  type: csv
+  
input:
+  type: file
+  file_type: csv
   base_dir: ../data/csv # the directory containing the CSV files, this is relative to the config file
   file_pattern: '.*[\/](?P<source>[^\/]+)[\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.csv$' # a regex to match the CSV files
   # An additional file filter which uses the named groups from the file_pattern to further filter the files
@@ -420,14 +422,15 @@ a {
         column: "title",
         value: "My document"
-
-
input:
-  type: text
+  
input:
+  type: file
+  file_type: csv
   base_dir: ../data/csv # the directory containing the CSV files, this is relative to the config file
   file_pattern: '.*[\/](?P<source>[^\/]+)[\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.csv$' # a regex to match the CSV files
   # An additional file filter which uses the named groups from the file_pattern to further filter the files
@@ -442,7 +445,7 @@ a {
         column: "title",
         value: "My document"
-
diff --git a/posts/config/env_vars/index.html b/posts/config/env_vars/index.html index e571d4b0..6d900a17 100644 --- a/posts/config/env_vars/index.html +++ b/posts/config/env_vars/index.html @@ -648,7 +648,7 @@ a {

Input Settings

These settings control the data input used by the pipeline. Any settings with a fallback will use the base LLM settings, if available.

-

Plaintext Input Data (GRAPHRAG_INPUT_TYPE=text)

+

Plaintext Input Data (GRAPHRAG_INPUT_FILE_TYPE=text)

@@ -669,7 +669,7 @@ a {
-

CSV Input Data (GRAPHRAG_INPUT_TYPE=csv)

+

CSV Input Data (GRAPHRAG_INPUT_FILE_TYPE=csv)

@@ -682,6 +682,13 @@ a { + + + + + + + @@ -731,13 +738,6 @@ a { - - - - - - - @@ -780,7 +780,7 @@ a { - + diff --git a/posts/config/json_yaml/index.html b/posts/config/json_yaml/index.html index a6feed83..855ebc5e 100644 --- a/posts/config/json_yaml/index.html +++ b/posts/config/json_yaml/index.html @@ -286,7 +286,8 @@ API_KEY=some_api_key

input

Fields

GRAPHRAG_INPUT_TYPEThe input storage type to use when reading files. (file or blob)stroptionalfile
GRAPHRAG_INPUT_FILE_PATTERN The file pattern regexp to use when reading input files from the input directory. strtitle
GRAPHRAG_INPUT_STORAGE_TYPEThe storage type to use when reading CSV input files. (file or blob)stroptionalfile
GRAPHRAG_INPUT_STORAGE_ACCOUNT_BLOB_URL The Azure Storage blob endpoint to use when in blob mode and using managed identity. Will have the format https://<storage_account_name>.blob.core.windows.net str
GRAPHRAG_INPUT_TYPEGRAPHRAG_INPUT_FILE_TYPE The type of input data, csv or text str optional