mirror of
https://github.com/microsoft/graphrag.git
synced 2026-01-14 09:07:20 +08:00
1675 lines
46 KiB
HTML
1675 lines
46 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
|
|
<link rel="prev" href="../env_vars/">
|
|
|
|
|
|
<link rel="next" href="../custom/">
|
|
|
|
|
|
<link rel="icon" href="../../assets/images/favicon.png">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.43">
|
|
|
|
|
|
|
|
<title>Using JSON or YAML - GraphRAG</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.0253249f.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js"></script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../stylesheets/extra.css">
|
|
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#default-configuration-mode-using-jsonyaml" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<header class="md-header" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../.." title="GraphRAG" class="md-header__button md-logo" aria-label="GraphRAG" data-md-component="logo">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19.375 8.5a3.25 3.25 0 1 1-3.163 4h-3a3.252 3.252 0 0 1-4.443 2.509L7.214 17.76a3.25 3.25 0 1 1-1.342-.674l1.672-2.957A3.24 3.24 0 0 1 6.75 12c0-.907.371-1.727.97-2.316L6.117 6.846A3.253 3.253 0 0 1 1.875 3.75a3.25 3.25 0 1 1 5.526 2.32l1.603 2.836A3.25 3.25 0 0 1 13.093 11h3.119a3.25 3.25 0 0 1 3.163-2.5M10 10.25a1.75 1.75 0 1 0-.001 3.499A1.75 1.75 0 0 0 10 10.25M5.125 2a1.75 1.75 0 1 0 0 3.5 1.75 1.75 0 0 0 0-3.5m12.5 9.75a1.75 1.75 0 1 0 3.5 0 1.75 1.75 0 0 0-3.5 0m-14.25 8.5a1.75 1.75 0 1 0 3.501-.001 1.75 1.75 0 0 0-3.501.001"/></svg>
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
GraphRAG
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Using JSON or YAML
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<form class="md-header__option" data-md-component="palette">
|
|
|
|
|
|
|
|
|
|
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
|
|
|
|
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
|
|
</label>
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1">
|
|
|
|
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
|
|
</label>
|
|
|
|
|
|
</form>
|
|
|
|
|
|
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|
|
|
|
|
|
|
<label class="md-header__button md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
</label>
|
|
<div class="md-search" data-md-component="search" role="dialog">
|
|
<label class="md-search__overlay" for="__search"></label>
|
|
<div class="md-search__inner" role="search">
|
|
<form class="md-search__form" name="search">
|
|
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
|
<label class="md-search__icon md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</label>
|
|
<nav class="md-search__options" aria-label="Search">
|
|
|
|
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
</nav>
|
|
|
|
</form>
|
|
<div class="md-search__output">
|
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
|
<div class="md-search-result" data-md-component="search-result">
|
|
<div class="md-search-result__meta">
|
|
Initializing search
|
|
</div>
|
|
<ol class="md-search-result__list" role="presentation"></ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/microsoft/graphrag" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
graphrag
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
|
<div class="md-grid">
|
|
<ul class="md-tabs__list">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../.." class="md-tabs__link">
|
|
|
|
|
|
Home
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item md-tabs__item--active">
|
|
<a href="../../index/overview/" class="md-tabs__link">
|
|
|
|
|
|
Indexing
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../prompt_tuning/overview/" class="md-tabs__link">
|
|
|
|
|
|
Prompt Tuning
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../query/overview/" class="md-tabs__link">
|
|
|
|
|
|
Query
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../blog_posts/" class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
Microsoft Research Blog
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../cli/" class="md-tabs__link">
|
|
|
|
|
|
Extras
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</div>
|
|
</nav>
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../.." title="GraphRAG" class="md-nav__button md-logo" aria-label="GraphRAG" data-md-component="logo">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19.375 8.5a3.25 3.25 0 1 1-3.163 4h-3a3.252 3.252 0 0 1-4.443 2.509L7.214 17.76a3.25 3.25 0 1 1-1.342-.674l1.672-2.957A3.24 3.24 0 0 1 6.75 12c0-.907.371-1.727.97-2.316L6.117 6.846A3.253 3.253 0 0 1 1.875 3.75a3.25 3.25 0 1 1 5.526 2.32l1.603 2.836A3.25 3.25 0 0 1 13.093 11h3.119a3.25 3.25 0 0 1 3.163-2.5M10 10.25a1.75 1.75 0 1 0-.001 3.499A1.75 1.75 0 0 0 10 10.25M5.125 2a1.75 1.75 0 1 0 0 3.5 1.75 1.75 0 0 0 0-3.5m12.5 9.75a1.75 1.75 0 1 0 3.5 0 1.75 1.75 0 0 0-3.5 0m-14.25 8.5a1.75 1.75 0 1 0 3.501-.001 1.75 1.75 0 0 0-3.501.001"/></svg>
|
|
|
|
</a>
|
|
GraphRAG
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/microsoft/graphrag" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
graphrag
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_1" id="__nav_1_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Home
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Home
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../.." class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Welcome
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Getting Started
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../developing/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Development Guide
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Indexing
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Indexing
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../index/overview/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../index/architecture/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Architecture
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../index/default_dataflow/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Dataflow
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_4" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2_4" id="__nav_2_4_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Configuration
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_4_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_2_4">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Configuration
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../overview/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../init/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Init Command
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../env_vars/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Using Env Vars
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Using JSON or YAML
|
|
</span>
|
|
|
|
|
|
</a>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../custom/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Fully Custom
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../template/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Template
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Prompt Tuning
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_3">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Prompt Tuning
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../prompt_tuning/overview/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../prompt_tuning/auto_prompt_tuning/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Auto Tuning
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../prompt_tuning/manual_prompt_tuning/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Manual Tuning
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Query
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_4">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Query
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../query/overview/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../query/global_search/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Global Search
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../query/local_search/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Local Search
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../query/drift_search/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
DRIFT Search
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../query/question_generation/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Question Generation
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_6" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_4_6" id="__nav_4_6_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Notebooks
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_6_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_4_6">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Notebooks
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../query/notebooks/overview/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../examples_notebooks/global_search/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Global Search
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../examples_notebooks/local_search/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Local Search
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../examples_notebooks/drift_search/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
DRIFT Search
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../blog_posts/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Microsoft Research Blog
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Extras
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_6">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Extras
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../cli/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
CLI
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_2" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_6_2" id="__nav_6_2_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Operation Dulce
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_6_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Operation Dulce
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../data/operation_dulce/ABOUT/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
About
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../data/operation_dulce/Operation%20Dulce%20v2%201%201/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Document
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<h1 id="default-configuration-mode-using-jsonyaml">Default Configuration Mode (using JSON/YAML)</h1>
|
|
<p>The default configuration mode may be configured by using a <code>settings.json</code> or <code>settings.yml</code> file in the data project root. If a <code>.env</code> file is present along with this config file, then it will be loaded, and the environment variables defined therein will be available for token replacements in your configuration document using <code>${ENV_VAR}</code> syntax.</p>
|
|
<p>For example:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a># .env
|
|
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>API_KEY=some_api_key
|
|
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a>
|
|
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a># settings.json
|
|
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a>{
|
|
<a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> "llm": {
|
|
<a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> "api_key": "${API_KEY}"
|
|
<a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> }
|
|
<a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a>}
|
|
</code></pre></div>
|
|
<h1 id="config-sections">Config Sections</h1>
|
|
<h2 id="input">input</h2>
|
|
<h3 id="fields">Fields</h3>
|
|
<ul>
|
|
<li><code>type</code> <strong>file|blob</strong> - The input type to use. Default=<code>file</code></li>
|
|
<li><code>file_type</code> <strong>text|csv</strong> - The type of input data to load. Either <code>text</code> or <code>csv</code>. Default is <code>text</code></li>
|
|
<li><code>file_encoding</code> <strong>str</strong> - The encoding of the input file. Default is <code>utf-8</code></li>
|
|
<li><code>file_pattern</code> <strong>str</strong> - A regex to match input files. Default is <code>.*\.csv$</code> if in csv mode and <code>.*\.txt$</code> if in text mode.</li>
|
|
<li><code>source_column</code> <strong>str</strong> - (CSV Mode Only) The source column name.</li>
|
|
<li><code>timestamp_column</code> <strong>str</strong> - (CSV Mode Only) The timestamp column name.</li>
|
|
<li><code>timestamp_format</code> <strong>str</strong> - (CSV Mode Only) The source format.</li>
|
|
<li><code>text_column</code> <strong>str</strong> - (CSV Mode Only) The text column name.</li>
|
|
<li><code>title_column</code> <strong>str</strong> - (CSV Mode Only) The title column name.</li>
|
|
<li><code>document_attribute_columns</code> <strong>list[str]</strong> - (CSV Mode Only) The additional document attributes to include.</li>
|
|
<li><code>connection_string</code> <strong>str</strong> - (blob only) The Azure Storage connection string.</li>
|
|
<li><code>container_name</code> <strong>str</strong> - (blob only) The Azure Storage container name.</li>
|
|
<li><code>base_dir</code> <strong>str</strong> - The base directory to read input from, relative to the root.</li>
|
|
<li><code>storage_account_blob_url</code> <strong>str</strong> - The storage account blob URL to use.</li>
|
|
</ul>
|
|
<h2 id="llm">llm</h2>
|
|
<p>This is the base LLM configuration section. Other steps may override this configuration with their own LLM configuration.</p>
|
|
<h3 id="fields_1">Fields</h3>
|
|
<ul>
|
|
<li><code>api_key</code> <strong>str</strong> - The OpenAI API key to use.</li>
|
|
<li><code>type</code> <strong>openai_chat|azure_openai_chat|openai_embedding|azure_openai_embedding</strong> - The type of LLM to use.</li>
|
|
<li><code>model</code> <strong>str</strong> - The model name.</li>
|
|
<li><code>max_tokens</code> <strong>int</strong> - The maximum number of output tokens.</li>
|
|
<li><code>request_timeout</code> <strong>float</strong> - The per-request timeout.</li>
|
|
<li><code>api_base</code> <strong>str</strong> - The API base url to use.</li>
|
|
<li><code>api_version</code> <strong>str</strong> - The API version</li>
|
|
<li><code>organization</code> <strong>str</strong> - The client organization.</li>
|
|
<li><code>proxy</code> <strong>str</strong> - The proxy URL to use.</li>
|
|
<li><code>audience</code> <strong>str</strong> - (Azure OpenAI only) The URI of the target Azure resource/service for which a managed identity token is requested. Used if <code>api_key</code> is not defined. Default=<code>https://cognitiveservices.azure.com/.default</code></li>
|
|
<li><code>deployment_name</code> <strong>str</strong> - The deployment name to use (Azure).</li>
|
|
<li><code>model_supports_json</code> <strong>bool</strong> - Whether the model supports JSON-mode output.</li>
|
|
<li><code>tokens_per_minute</code> <strong>int</strong> - Set a leaky-bucket throttle on tokens-per-minute.</li>
|
|
<li><code>requests_per_minute</code> <strong>int</strong> - Set a leaky-bucket throttle on requests-per-minute.</li>
|
|
<li><code>max_retries</code> <strong>int</strong> - The maximum number of retries to use.</li>
|
|
<li><code>max_retry_wait</code> <strong>float</strong> - The maximum backoff time.</li>
|
|
<li><code>sleep_on_rate_limit_recommendation</code> <strong>bool</strong> - Whether to adhere to sleep recommendations (Azure).</li>
|
|
<li><code>concurrent_requests</code> <strong>int</strong> The number of open requests to allow at once.</li>
|
|
<li><code>temperature</code> <strong>float</strong> - The temperature to use.</li>
|
|
<li><code>top_p</code> <strong>float</strong> - The top-p value to use.</li>
|
|
<li><code>n</code> <strong>int</strong> - The number of completions to generate.</li>
|
|
</ul>
|
|
<h2 id="parallelization">parallelization</h2>
|
|
<h3 id="fields_2">Fields</h3>
|
|
<ul>
|
|
<li><code>stagger</code> <strong>float</strong> - The threading stagger value.</li>
|
|
<li><code>num_threads</code> <strong>int</strong> - The maximum number of work threads.</li>
|
|
</ul>
|
|
<h2 id="async_mode">async_mode</h2>
|
|
<p><strong>asyncio|threaded</strong> The async mode to use. Either <code>asyncio</code> or `threaded.</p>
|
|
<h2 id="embeddings">embeddings</h2>
|
|
<h3 id="fields_3">Fields</h3>
|
|
<ul>
|
|
<li><code>llm</code> (see LLM top-level config)</li>
|
|
<li><code>parallelization</code> (see Parallelization top-level config)</li>
|
|
<li><code>async_mode</code> (see Async Mode top-level config)</li>
|
|
<li><code>batch_size</code> <strong>int</strong> - The maximum batch size to use.</li>
|
|
<li><code>batch_max_tokens</code> <strong>int</strong> - The maximum batch # of tokens.</li>
|
|
<li><code>target</code> <strong>required|all|none</strong> - Determines which set of embeddings to emit.</li>
|
|
<li><code>skip</code> <strong>list[str]</strong> - Which embeddings to skip. Only useful if target=all to customize the list.</li>
|
|
<li><code>vector_store</code> <strong>dict</strong> - The vector store to use. Configured for lancedb by default.</li>
|
|
<li><code>type</code> <strong>str</strong> - <code>lancedb</code> or <code>azure_ai_search</code>. Default=<code>lancedb</code></li>
|
|
<li><code>db_uri</code> <strong>str</strong> (only for lancedb) - The database uri. Default=<code>storage.base_dir/lancedb</code></li>
|
|
<li><code>url</code> <strong>str</strong> (only for AI Search) - AI Search endpoint</li>
|
|
<li><code>api_key</code> <strong>str</strong> (optional - only for AI Search) - The AI Search api key to use.</li>
|
|
<li><code>audience</code> <strong>str</strong> (only for AI Search) - Audience for managed identity token if managed identity authentication is used.</li>
|
|
<li><code>overwrite</code> <strong>bool</strong> (only used at index creation time) - Overwrite collection if it exist. Default=<code>True</code></li>
|
|
<li><code>container_name</code> <strong>str</strong> - The name of a vector container. This stores all indexes (tables) for a given dataset ingest. Default=<code>default</code></li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the text-embedding strategy.</li>
|
|
</ul>
|
|
<h2 id="chunks">chunks</h2>
|
|
<h3 id="fields_4">Fields</h3>
|
|
<ul>
|
|
<li><code>size</code> <strong>int</strong> - The max chunk size in tokens.</li>
|
|
<li><code>overlap</code> <strong>int</strong> - The chunk overlap in tokens.</li>
|
|
<li><code>group_by_columns</code> <strong>list[str]</strong> - group documents by fields before chunking.</li>
|
|
<li><code>encoding_model</code> <strong>str</strong> - The text encoding model to use. Default is to use the top-level encoding model.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the chunking strategy.</li>
|
|
</ul>
|
|
<h2 id="cache">cache</h2>
|
|
<h3 id="fields_5">Fields</h3>
|
|
<ul>
|
|
<li><code>type</code> <strong>file|memory|none|blob</strong> - The cache type to use. Default=<code>file</code></li>
|
|
<li><code>connection_string</code> <strong>str</strong> - (blob only) The Azure Storage connection string.</li>
|
|
<li><code>container_name</code> <strong>str</strong> - (blob only) The Azure Storage container name.</li>
|
|
<li><code>base_dir</code> <strong>str</strong> - The base directory to write cache to, relative to the root.</li>
|
|
<li><code>storage_account_blob_url</code> <strong>str</strong> - The storage account blob URL to use.</li>
|
|
</ul>
|
|
<h2 id="storage">storage</h2>
|
|
<h3 id="fields_6">Fields</h3>
|
|
<ul>
|
|
<li><code>type</code> <strong>file|memory|blob</strong> - The storage type to use. Default=<code>file</code></li>
|
|
<li><code>connection_string</code> <strong>str</strong> - (blob only) The Azure Storage connection string.</li>
|
|
<li><code>container_name</code> <strong>str</strong> - (blob only) The Azure Storage container name.</li>
|
|
<li><code>base_dir</code> <strong>str</strong> - The base directory to write reports to, relative to the root.</li>
|
|
<li><code>storage_account_blob_url</code> <strong>str</strong> - The storage account blob URL to use.</li>
|
|
</ul>
|
|
<h2 id="reporting">reporting</h2>
|
|
<h3 id="fields_7">Fields</h3>
|
|
<ul>
|
|
<li><code>type</code> <strong>file|console|blob</strong> - The reporting type to use. Default=<code>file</code></li>
|
|
<li><code>connection_string</code> <strong>str</strong> - (blob only) The Azure Storage connection string.</li>
|
|
<li><code>container_name</code> <strong>str</strong> - (blob only) The Azure Storage container name.</li>
|
|
<li><code>base_dir</code> <strong>str</strong> - The base directory to write reports to, relative to the root.</li>
|
|
<li><code>storage_account_blob_url</code> <strong>str</strong> - The storage account blob URL to use.</li>
|
|
</ul>
|
|
<h2 id="entity_extraction">entity_extraction</h2>
|
|
<h3 id="fields_8">Fields</h3>
|
|
<ul>
|
|
<li><code>llm</code> (see LLM top-level config)</li>
|
|
<li><code>parallelization</code> (see Parallelization top-level config)</li>
|
|
<li><code>async_mode</code> (see Async Mode top-level config)</li>
|
|
<li><code>prompt</code> <strong>str</strong> - The prompt file to use.</li>
|
|
<li><code>entity_types</code> <strong>list[str]</strong> - The entity types to identify.</li>
|
|
<li><code>max_gleanings</code> <strong>int</strong> - The maximum number of gleaning cycles to use.</li>
|
|
<li><code>encoding_model</code> <strong>str</strong> - The text encoding model to use. By default, this will use the top-level encoding model.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the entity extraction strategy.</li>
|
|
</ul>
|
|
<h2 id="summarize_descriptions">summarize_descriptions</h2>
|
|
<h3 id="fields_9">Fields</h3>
|
|
<ul>
|
|
<li><code>llm</code> (see LLM top-level config)</li>
|
|
<li><code>parallelization</code> (see Parallelization top-level config)</li>
|
|
<li><code>async_mode</code> (see Async Mode top-level config)</li>
|
|
<li><code>prompt</code> <strong>str</strong> - The prompt file to use.</li>
|
|
<li><code>max_length</code> <strong>int</strong> - The maximum number of output tokens per summarization.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the summarize description strategy.</li>
|
|
</ul>
|
|
<h2 id="claim_extraction">claim_extraction</h2>
|
|
<h3 id="fields_10">Fields</h3>
|
|
<ul>
|
|
<li><code>enabled</code> <strong>bool</strong> - Whether to enable claim extraction. default=False</li>
|
|
<li><code>llm</code> (see LLM top-level config)</li>
|
|
<li><code>parallelization</code> (see Parallelization top-level config)</li>
|
|
<li><code>async_mode</code> (see Async Mode top-level config)</li>
|
|
<li><code>prompt</code> <strong>str</strong> - The prompt file to use.</li>
|
|
<li><code>description</code> <strong>str</strong> - Describes the types of claims we want to extract.</li>
|
|
<li><code>max_gleanings</code> <strong>int</strong> - The maximum number of gleaning cycles to use.</li>
|
|
<li><code>encoding_model</code> <strong>str</strong> - The text encoding model to use. By default, this will use the top-level encoding model.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the claim extraction strategy.</li>
|
|
</ul>
|
|
<h2 id="community_reports">community_reports</h2>
|
|
<h3 id="fields_11">Fields</h3>
|
|
<ul>
|
|
<li><code>llm</code> (see LLM top-level config)</li>
|
|
<li><code>parallelization</code> (see Parallelization top-level config)</li>
|
|
<li><code>async_mode</code> (see Async Mode top-level config)</li>
|
|
<li><code>prompt</code> <strong>str</strong> - The prompt file to use.</li>
|
|
<li><code>max_length</code> <strong>int</strong> - The maximum number of output tokens per report.</li>
|
|
<li><code>max_input_length</code> <strong>int</strong> - The maximum number of input tokens to use when generating reports.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the community reports strategy.</li>
|
|
</ul>
|
|
<h2 id="cluster_graph">cluster_graph</h2>
|
|
<h3 id="fields_12">Fields</h3>
|
|
<ul>
|
|
<li><code>max_cluster_size</code> <strong>int</strong> - The maximum cluster size to emit.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the cluster_graph strategy.</li>
|
|
</ul>
|
|
<h2 id="embed_graph">embed_graph</h2>
|
|
<h3 id="fields_13">Fields</h3>
|
|
<ul>
|
|
<li><code>enabled</code> <strong>bool</strong> - Whether to enable graph embeddings.</li>
|
|
<li><code>num_walks</code> <strong>int</strong> - The node2vec number of walks.</li>
|
|
<li><code>walk_length</code> <strong>int</strong> - The node2vec walk length.</li>
|
|
<li><code>window_size</code> <strong>int</strong> - The node2vec window size.</li>
|
|
<li><code>iterations</code> <strong>int</strong> - The node2vec number of iterations.</li>
|
|
<li><code>random_seed</code> <strong>int</strong> - The node2vec random seed.</li>
|
|
<li><code>strategy</code> <strong>dict</strong> - Fully override the embed graph strategy.</li>
|
|
</ul>
|
|
<h2 id="umap">umap</h2>
|
|
<h3 id="fields_14">Fields</h3>
|
|
<ul>
|
|
<li><code>enabled</code> <strong>bool</strong> - Whether to enable UMAP layouts.</li>
|
|
</ul>
|
|
<h2 id="snapshots">snapshots</h2>
|
|
<h3 id="fields_15">Fields</h3>
|
|
<ul>
|
|
<li><code>embeddings</code> <strong>bool</strong> - Emit embeddings snapshots to parquet.</li>
|
|
<li><code>graphml</code> <strong>bool</strong> - Emit graph snapshots to GraphML.</li>
|
|
<li><code>raw_entities</code> <strong>bool</strong> - Emit raw entity snapshots to JSON.</li>
|
|
<li><code>top_level_nodes</code> <strong>bool</strong> - Emit top-level-node snapshots to JSON.</li>
|
|
<li><code>transient</code> <strong>bool</strong> - Emit transient workflow tables snapshots to parquet.</li>
|
|
</ul>
|
|
<h2 id="encoding_model">encoding_model</h2>
|
|
<p><strong>str</strong> - The text encoding model to use. Default=<code>cl100k_base</code>.</p>
|
|
<h2 id="skip_workflows">skip_workflows</h2>
|
|
<p><strong>list[str]</strong> - Which workflow names to skip.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
|
|
|
|
<nav class="md-footer__inner md-grid" aria-label="Footer" >
|
|
|
|
|
|
<a href="../env_vars/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Using Env Vars">
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</div>
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Previous
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Using Env Vars
|
|
</div>
|
|
</div>
|
|
</a>
|
|
|
|
|
|
|
|
<a href="../custom/" class="md-footer__link md-footer__link--next" aria-label="Next: Fully Custom">
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Next
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Fully Custom
|
|
</div>
|
|
</div>
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
|
|
</div>
|
|
</a>
|
|
|
|
</nav>
|
|
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
<div class="md-copyright__highlight">
|
|
© 2025 Microsoft | <a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a> | <a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Heath Privacy</a> | <a onclick="window.manageConsent();">Cookies</a> | <a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a> | <a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
|
|
|
</div>
|
|
|
|
|
|
Made with
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|
Material for MkDocs
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
<script id="__config" type="application/json">{"base": "../..", "features": ["content.code.copy", "content.code.select", "navigation.footer", "navigation.tabs"], "search": "../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
|
|
|
|
|
|
<script src="../../assets/javascripts/bundle.83f73b43.min.js"></script>
|
|
|
|
<script src="../../scripts/create_cookie_banner.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |