mirror of
https://github.com/langgenius/dify.git
synced 2026-01-13 21:57:48 +08:00
feat(embedding-process): implement embedding process components and polling logic (#30622)
Some checks are pending
autofix.ci / autofix (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Some checks are pending
autofix.ci / autofix (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
This commit is contained in:
parent
9848823dcd
commit
98df99b0ca
1562
web/app/components/datasets/create/embedding-process/index.spec.tsx
Normal file
1562
web/app/components/datasets/create/embedding-process/index.spec.tsx
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,47 +1,29 @@
|
|||||||
import type { FC } from 'react'
|
import type { FC } from 'react'
|
||||||
import type {
|
import type { FullDocumentDetail } from '@/models/datasets'
|
||||||
DataSourceInfo,
|
import type { RETRIEVE_METHOD } from '@/types/app'
|
||||||
FullDocumentDetail,
|
|
||||||
IndexingStatusResponse,
|
|
||||||
LegacyDataSourceInfo,
|
|
||||||
ProcessRuleResponse,
|
|
||||||
} from '@/models/datasets'
|
|
||||||
import {
|
import {
|
||||||
RiArrowRightLine,
|
RiArrowRightLine,
|
||||||
RiCheckboxCircleFill,
|
|
||||||
RiErrorWarningFill,
|
|
||||||
RiLoader2Fill,
|
RiLoader2Fill,
|
||||||
RiTerminalBoxLine,
|
RiTerminalBoxLine,
|
||||||
} from '@remixicon/react'
|
} from '@remixicon/react'
|
||||||
import Image from 'next/image'
|
|
||||||
import Link from 'next/link'
|
import Link from 'next/link'
|
||||||
import { useRouter } from 'next/navigation'
|
import { useRouter } from 'next/navigation'
|
||||||
import * as React from 'react'
|
import { useMemo } from 'react'
|
||||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import Button from '@/app/components/base/button'
|
import Button from '@/app/components/base/button'
|
||||||
import Divider from '@/app/components/base/divider'
|
import Divider from '@/app/components/base/divider'
|
||||||
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
|
|
||||||
import NotionIcon from '@/app/components/base/notion-icon'
|
|
||||||
import Tooltip from '@/app/components/base/tooltip'
|
|
||||||
import PriorityLabel from '@/app/components/billing/priority-label'
|
|
||||||
import { Plan } from '@/app/components/billing/type'
|
import { Plan } from '@/app/components/billing/type'
|
||||||
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
|
|
||||||
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
|
|
||||||
import { useProviderContext } from '@/context/provider-context'
|
import { useProviderContext } from '@/context/provider-context'
|
||||||
import { useDatasetApiAccessUrl } from '@/hooks/use-api-access-url'
|
import { useDatasetApiAccessUrl } from '@/hooks/use-api-access-url'
|
||||||
import { DataSourceType, ProcessMode } from '@/models/datasets'
|
|
||||||
import { fetchIndexingStatusBatch as doFetchIndexingStatus } from '@/service/datasets'
|
|
||||||
import { useProcessRule } from '@/service/knowledge/use-dataset'
|
import { useProcessRule } from '@/service/knowledge/use-dataset'
|
||||||
import { useInvalidDocumentList } from '@/service/knowledge/use-document'
|
import { useInvalidDocumentList } from '@/service/knowledge/use-document'
|
||||||
import { RETRIEVE_METHOD } from '@/types/app'
|
import IndexingProgressItem from './indexing-progress-item'
|
||||||
import { sleep } from '@/utils'
|
import RuleDetail from './rule-detail'
|
||||||
import { cn } from '@/utils/classnames'
|
import UpgradeBanner from './upgrade-banner'
|
||||||
import DocumentFileIcon from '../../common/document-file-icon'
|
import { useIndexingStatusPolling } from './use-indexing-status-polling'
|
||||||
import { indexMethodIcon, retrievalIcon } from '../icons'
|
import { createDocumentLookup } from './utils'
|
||||||
import { IndexingType } from '../step-two'
|
|
||||||
|
|
||||||
type Props = {
|
type EmbeddingProcessProps = {
|
||||||
datasetId: string
|
datasetId: string
|
||||||
batchId: string
|
batchId: string
|
||||||
documents?: FullDocumentDetail[]
|
documents?: FullDocumentDetail[]
|
||||||
@ -49,333 +31,121 @@ type Props = {
|
|||||||
retrievalMethod?: RETRIEVE_METHOD
|
retrievalMethod?: RETRIEVE_METHOD
|
||||||
}
|
}
|
||||||
|
|
||||||
const RuleDetail: FC<{
|
// Status header component
|
||||||
sourceData?: ProcessRuleResponse
|
const StatusHeader: FC<{ isEmbedding: boolean, isCompleted: boolean }> = ({
|
||||||
indexingType?: string
|
isEmbedding,
|
||||||
retrievalMethod?: RETRIEVE_METHOD
|
isCompleted,
|
||||||
}> = ({ sourceData, indexingType, retrievalMethod }) => {
|
}) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
|
||||||
const segmentationRuleMap = {
|
|
||||||
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
|
|
||||||
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
|
|
||||||
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
|
|
||||||
}
|
|
||||||
|
|
||||||
const getRuleName = (key: string) => {
|
|
||||||
if (key === 'remove_extra_spaces')
|
|
||||||
return t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' })
|
|
||||||
|
|
||||||
if (key === 'remove_urls_emails')
|
|
||||||
return t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' })
|
|
||||||
|
|
||||||
if (key === 'remove_stopwords')
|
|
||||||
return t('stepTwo.removeStopwords', { ns: 'datasetCreation' })
|
|
||||||
}
|
|
||||||
|
|
||||||
const isNumber = (value: unknown) => {
|
|
||||||
return typeof value === 'number'
|
|
||||||
}
|
|
||||||
|
|
||||||
const getValue = useCallback((field: string) => {
|
|
||||||
let value: string | number | undefined = '-'
|
|
||||||
const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
|
|
||||||
? sourceData.rules.segmentation.max_tokens
|
|
||||||
: value
|
|
||||||
const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
|
|
||||||
? sourceData.rules.subchunk_segmentation.max_tokens
|
|
||||||
: value
|
|
||||||
switch (field) {
|
|
||||||
case 'mode':
|
|
||||||
value = !sourceData?.mode
|
|
||||||
? value
|
|
||||||
: sourceData.mode === ProcessMode.general
|
|
||||||
? (t('embedding.custom', { ns: 'datasetDocuments' }) as string)
|
|
||||||
: `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${sourceData?.rules?.parent_mode === 'paragraph'
|
|
||||||
? t('parentMode.paragraph', { ns: 'dataset' })
|
|
||||||
: t('parentMode.fullDoc', { ns: 'dataset' })}`
|
|
||||||
break
|
|
||||||
case 'segmentLength':
|
|
||||||
value = !sourceData?.mode
|
|
||||||
? value
|
|
||||||
: sourceData.mode === ProcessMode.general
|
|
||||||
? maxTokens
|
|
||||||
: `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
|
|
||||||
break
|
|
||||||
default:
|
|
||||||
value = !sourceData?.mode
|
|
||||||
? value
|
|
||||||
: sourceData?.rules?.pre_processing_rules?.filter(rule =>
|
|
||||||
rule.enabled).map(rule => getRuleName(rule.id)).join(',')
|
|
||||||
break
|
|
||||||
}
|
|
||||||
return value
|
|
||||||
}, [sourceData])
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col gap-1">
|
<div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
|
||||||
{Object.keys(segmentationRuleMap).map((field) => {
|
{isEmbedding && (
|
||||||
return (
|
<>
|
||||||
<FieldInfo
|
<RiLoader2Fill className="size-4 animate-spin" />
|
||||||
key={field}
|
<span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
|
||||||
label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
|
</>
|
||||||
displayedValue={String(getValue(field))}
|
)}
|
||||||
/>
|
{isCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
|
||||||
)
|
|
||||||
})}
|
|
||||||
<FieldInfo
|
|
||||||
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
|
|
||||||
displayedValue={t(`stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' }) as string}
|
|
||||||
valueIcon={(
|
|
||||||
<Image
|
|
||||||
className="size-4"
|
|
||||||
src={
|
|
||||||
indexingType === IndexingType.ECONOMICAL
|
|
||||||
? indexMethodIcon.economical
|
|
||||||
: indexMethodIcon.high_quality
|
|
||||||
}
|
|
||||||
alt=""
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
<FieldInfo
|
|
||||||
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
|
|
||||||
// displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
|
|
||||||
displayedValue={t(`retrieval.${indexingType === IndexingType.ECONOMICAL ? 'keyword_search' : retrievalMethod ?? 'semantic_search'}.title`, { ns: 'dataset' })}
|
|
||||||
valueIcon={(
|
|
||||||
<Image
|
|
||||||
className="size-4"
|
|
||||||
src={
|
|
||||||
retrievalMethod === RETRIEVE_METHOD.fullText
|
|
||||||
? retrievalIcon.fullText
|
|
||||||
: retrievalMethod === RETRIEVE_METHOD.hybrid
|
|
||||||
? retrievalIcon.hybrid
|
|
||||||
: retrievalIcon.vector
|
|
||||||
}
|
|
||||||
alt=""
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
|
// Action buttons component
|
||||||
|
const ActionButtons: FC<{
|
||||||
|
apiReferenceUrl: string
|
||||||
|
onNavToDocuments: () => void
|
||||||
|
}> = ({ apiReferenceUrl, onNavToDocuments }) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="mt-6 flex items-center gap-x-2 py-2">
|
||||||
|
<Link href={apiReferenceUrl} target="_blank" rel="noopener noreferrer">
|
||||||
|
<Button className="w-fit gap-x-0.5 px-3">
|
||||||
|
<RiTerminalBoxLine className="size-4" />
|
||||||
|
<span className="px-0.5">Access the API</span>
|
||||||
|
</Button>
|
||||||
|
</Link>
|
||||||
|
<Button
|
||||||
|
className="w-fit gap-x-0.5 px-3"
|
||||||
|
variant="primary"
|
||||||
|
onClick={onNavToDocuments}
|
||||||
|
>
|
||||||
|
<span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
|
||||||
|
<RiArrowRightLine className="size-4 stroke-current stroke-1" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const EmbeddingProcess: FC<EmbeddingProcessProps> = ({
|
||||||
|
datasetId,
|
||||||
|
batchId,
|
||||||
|
documents = [],
|
||||||
|
indexingType,
|
||||||
|
retrievalMethod,
|
||||||
|
}) => {
|
||||||
const { enableBilling, plan } = useProviderContext()
|
const { enableBilling, plan } = useProviderContext()
|
||||||
|
|
||||||
const getFirstDocument = documents[0]
|
|
||||||
|
|
||||||
const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([])
|
|
||||||
const fetchIndexingStatus = async () => {
|
|
||||||
const status = await doFetchIndexingStatus({ datasetId, batchId })
|
|
||||||
setIndexingStatusDetail(status.data)
|
|
||||||
return status.data
|
|
||||||
}
|
|
||||||
|
|
||||||
const [isStopQuery, setIsStopQuery] = useState(false)
|
|
||||||
const isStopQueryRef = useRef(isStopQuery)
|
|
||||||
useEffect(() => {
|
|
||||||
isStopQueryRef.current = isStopQuery
|
|
||||||
}, [isStopQuery])
|
|
||||||
const stopQueryStatus = () => {
|
|
||||||
setIsStopQuery(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
const startQueryStatus = async () => {
|
|
||||||
if (isStopQueryRef.current)
|
|
||||||
return
|
|
||||||
|
|
||||||
try {
|
|
||||||
const indexingStatusBatchDetail = await fetchIndexingStatus()
|
|
||||||
const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status))
|
|
||||||
if (isCompleted) {
|
|
||||||
stopQueryStatus()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
await sleep(2500)
|
|
||||||
await startQueryStatus()
|
|
||||||
}
|
|
||||||
catch {
|
|
||||||
await sleep(2500)
|
|
||||||
await startQueryStatus()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
setIsStopQuery(false)
|
|
||||||
startQueryStatus()
|
|
||||||
return () => {
|
|
||||||
stopQueryStatus()
|
|
||||||
}
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
// get rule
|
|
||||||
const { data: ruleDetail } = useProcessRule(getFirstDocument?.id)
|
|
||||||
|
|
||||||
const router = useRouter()
|
const router = useRouter()
|
||||||
const invalidDocumentList = useInvalidDocumentList()
|
const invalidDocumentList = useInvalidDocumentList()
|
||||||
const navToDocumentList = () => {
|
const apiReferenceUrl = useDatasetApiAccessUrl()
|
||||||
|
|
||||||
|
// Polling hook for indexing status
|
||||||
|
const { statusList, isEmbedding, isEmbeddingCompleted } = useIndexingStatusPolling({
|
||||||
|
datasetId,
|
||||||
|
batchId,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Get process rule for the first document
|
||||||
|
const firstDocumentId = documents[0]?.id
|
||||||
|
const { data: ruleDetail } = useProcessRule(firstDocumentId)
|
||||||
|
|
||||||
|
// Document lookup utilities - memoized for performance
|
||||||
|
const documentLookup = useMemo(
|
||||||
|
() => createDocumentLookup(documents),
|
||||||
|
[documents],
|
||||||
|
)
|
||||||
|
|
||||||
|
const handleNavToDocuments = () => {
|
||||||
invalidDocumentList()
|
invalidDocumentList()
|
||||||
router.push(`/datasets/${datasetId}/documents`)
|
router.push(`/datasets/${datasetId}/documents`)
|
||||||
}
|
}
|
||||||
const apiReferenceUrl = useDatasetApiAccessUrl()
|
|
||||||
|
|
||||||
const isEmbedding = useMemo(() => {
|
const showUpgradeBanner = enableBilling && plan.type !== Plan.team
|
||||||
return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
|
|
||||||
}, [indexingStatusBatchDetail])
|
|
||||||
const isEmbeddingCompleted = useMemo(() => {
|
|
||||||
return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || ''))
|
|
||||||
}, [indexingStatusBatchDetail])
|
|
||||||
|
|
||||||
const getSourceName = (id: string) => {
|
|
||||||
const doc = documents.find(document => document.id === id)
|
|
||||||
return doc?.name
|
|
||||||
}
|
|
||||||
const getFileType = (name?: string) => name?.split('.').pop() || 'txt'
|
|
||||||
const getSourcePercent = (detail: IndexingStatusResponse) => {
|
|
||||||
const completedCount = detail.completed_segments || 0
|
|
||||||
const totalCount = detail.total_segments || 0
|
|
||||||
if (totalCount === 0)
|
|
||||||
return 0
|
|
||||||
const percent = Math.round(completedCount * 100 / totalCount)
|
|
||||||
return percent > 100 ? 100 : percent
|
|
||||||
}
|
|
||||||
const getSourceType = (id: string) => {
|
|
||||||
const doc = documents.find(document => document.id === id)
|
|
||||||
return doc?.data_source_type as DataSourceType
|
|
||||||
}
|
|
||||||
|
|
||||||
const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
|
|
||||||
return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
|
|
||||||
}
|
|
||||||
|
|
||||||
const getIcon = (id: string) => {
|
|
||||||
const doc = documents.find(document => document.id === id)
|
|
||||||
const info = doc?.data_source_info
|
|
||||||
if (info && isLegacyDataSourceInfo(info))
|
|
||||||
return info.notion_page_icon
|
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
const isSourceEmbedding = (detail: IndexingStatusResponse) =>
|
|
||||||
['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<div className="flex flex-col gap-y-3">
|
<div className="flex flex-col gap-y-3">
|
||||||
<div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
|
<StatusHeader isEmbedding={isEmbedding} isCompleted={isEmbeddingCompleted} />
|
||||||
{isEmbedding && (
|
|
||||||
<>
|
{showUpgradeBanner && <UpgradeBanner />}
|
||||||
<RiLoader2Fill className="size-4 animate-spin" />
|
|
||||||
<span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
{isEmbeddingCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
|
|
||||||
</div>
|
|
||||||
{
|
|
||||||
enableBilling && plan.type !== Plan.team && (
|
|
||||||
<div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
|
|
||||||
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
|
|
||||||
<ZapFast className="h-4 w-4 text-[#FB6514]" />
|
|
||||||
</div>
|
|
||||||
<div className="mx-3 grow text-[13px] font-medium text-gray-700">
|
|
||||||
{t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
|
|
||||||
</div>
|
|
||||||
<UpgradeBtn loc="knowledge-speed-up" />
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
<div className="flex flex-col gap-0.5 pb-2">
|
<div className="flex flex-col gap-0.5 pb-2">
|
||||||
{indexingStatusBatchDetail.map(indexingStatusDetail => (
|
{statusList.map(detail => (
|
||||||
<div
|
<IndexingProgressItem
|
||||||
key={indexingStatusDetail.id}
|
key={detail.id}
|
||||||
className={cn(
|
detail={detail}
|
||||||
'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
|
name={documentLookup.getName(detail.id)}
|
||||||
indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
|
sourceType={documentLookup.getSourceType(detail.id)}
|
||||||
)}
|
notionIcon={documentLookup.getNotionIcon(detail.id)}
|
||||||
>
|
enableBilling={enableBilling}
|
||||||
{isSourceEmbedding(indexingStatusDetail) && (
|
/>
|
||||||
<div
|
|
||||||
className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
|
|
||||||
style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
<div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
|
|
||||||
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
|
|
||||||
<DocumentFileIcon
|
|
||||||
size="sm"
|
|
||||||
className="shrink-0"
|
|
||||||
name={getSourceName(indexingStatusDetail.id)}
|
|
||||||
extension={getFileType(getSourceName(indexingStatusDetail.id))}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
{getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
|
|
||||||
<NotionIcon
|
|
||||||
className="shrink-0"
|
|
||||||
type="page"
|
|
||||||
src={getIcon(indexingStatusDetail.id)}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
<div className="flex w-0 grow items-center gap-1" title={getSourceName(indexingStatusDetail.id)}>
|
|
||||||
<div className="system-xs-medium truncate text-text-secondary">
|
|
||||||
{getSourceName(indexingStatusDetail.id)}
|
|
||||||
</div>
|
|
||||||
{
|
|
||||||
enableBilling && (
|
|
||||||
<PriorityLabel className="ml-0" />
|
|
||||||
)
|
|
||||||
}
|
|
||||||
</div>
|
|
||||||
{isSourceEmbedding(indexingStatusDetail) && (
|
|
||||||
<div className="shrink-0 text-xs text-text-secondary">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
|
|
||||||
)}
|
|
||||||
{indexingStatusDetail.indexing_status === 'error' && (
|
|
||||||
<Tooltip
|
|
||||||
popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
|
|
||||||
offset={4}
|
|
||||||
popupContent={indexingStatusDetail.error}
|
|
||||||
>
|
|
||||||
<span>
|
|
||||||
<RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
|
|
||||||
</span>
|
|
||||||
</Tooltip>
|
|
||||||
)}
|
|
||||||
{indexingStatusDetail.indexing_status === 'completed' && (
|
|
||||||
<RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<Divider type="horizontal" className="my-0 bg-divider-subtle" />
|
<Divider type="horizontal" className="my-0 bg-divider-subtle" />
|
||||||
|
|
||||||
<RuleDetail
|
<RuleDetail
|
||||||
sourceData={ruleDetail}
|
sourceData={ruleDetail}
|
||||||
indexingType={indexingType}
|
indexingType={indexingType}
|
||||||
retrievalMethod={retrievalMethod}
|
retrievalMethod={retrievalMethod}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
<div className="mt-6 flex items-center gap-x-2 py-2">
|
|
||||||
<Link
|
<ActionButtons
|
||||||
href={apiReferenceUrl}
|
apiReferenceUrl={apiReferenceUrl}
|
||||||
target="_blank"
|
onNavToDocuments={handleNavToDocuments}
|
||||||
rel="noopener noreferrer"
|
/>
|
||||||
>
|
|
||||||
<Button
|
|
||||||
className="w-fit gap-x-0.5 px-3"
|
|
||||||
>
|
|
||||||
<RiTerminalBoxLine className="size-4" />
|
|
||||||
<span className="px-0.5">Access the API</span>
|
|
||||||
</Button>
|
|
||||||
</Link>
|
|
||||||
<Button
|
|
||||||
className="w-fit gap-x-0.5 px-3"
|
|
||||||
variant="primary"
|
|
||||||
onClick={navToDocumentList}
|
|
||||||
>
|
|
||||||
<span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
|
|
||||||
<RiArrowRightLine className="size-4 stroke-current stroke-1" />
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,120 @@
|
|||||||
|
import type { FC } from 'react'
|
||||||
|
import type { IndexingStatusResponse } from '@/models/datasets'
|
||||||
|
import {
|
||||||
|
RiCheckboxCircleFill,
|
||||||
|
RiErrorWarningFill,
|
||||||
|
} from '@remixicon/react'
|
||||||
|
import NotionIcon from '@/app/components/base/notion-icon'
|
||||||
|
import Tooltip from '@/app/components/base/tooltip'
|
||||||
|
import PriorityLabel from '@/app/components/billing/priority-label'
|
||||||
|
import { DataSourceType } from '@/models/datasets'
|
||||||
|
import { cn } from '@/utils/classnames'
|
||||||
|
import DocumentFileIcon from '../../common/document-file-icon'
|
||||||
|
import { getFileType, getSourcePercent, isSourceEmbedding } from './utils'
|
||||||
|
|
||||||
|
type IndexingProgressItemProps = {
|
||||||
|
detail: IndexingStatusResponse
|
||||||
|
name?: string
|
||||||
|
sourceType?: DataSourceType
|
||||||
|
notionIcon?: string
|
||||||
|
enableBilling?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
// Status icon component for completed/error states
|
||||||
|
const StatusIcon: FC<{ status: string, error?: string }> = ({ status, error }) => {
|
||||||
|
if (status === 'completed')
|
||||||
|
return <RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
|
||||||
|
|
||||||
|
if (status === 'error') {
|
||||||
|
return (
|
||||||
|
<Tooltip
|
||||||
|
popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
|
||||||
|
offset={4}
|
||||||
|
popupContent={error}
|
||||||
|
>
|
||||||
|
<span>
|
||||||
|
<RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
|
||||||
|
</span>
|
||||||
|
</Tooltip>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source type icon component
|
||||||
|
const SourceTypeIcon: FC<{
|
||||||
|
sourceType?: DataSourceType
|
||||||
|
name?: string
|
||||||
|
notionIcon?: string
|
||||||
|
}> = ({ sourceType, name, notionIcon }) => {
|
||||||
|
if (sourceType === DataSourceType.FILE) {
|
||||||
|
return (
|
||||||
|
<DocumentFileIcon
|
||||||
|
size="sm"
|
||||||
|
className="shrink-0"
|
||||||
|
name={name}
|
||||||
|
extension={getFileType(name)}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sourceType === DataSourceType.NOTION) {
|
||||||
|
return (
|
||||||
|
<NotionIcon
|
||||||
|
className="shrink-0"
|
||||||
|
type="page"
|
||||||
|
src={notionIcon}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const IndexingProgressItem: FC<IndexingProgressItemProps> = ({
|
||||||
|
detail,
|
||||||
|
name,
|
||||||
|
sourceType,
|
||||||
|
notionIcon,
|
||||||
|
enableBilling,
|
||||||
|
}) => {
|
||||||
|
const isEmbedding = isSourceEmbedding(detail)
|
||||||
|
const percent = getSourcePercent(detail)
|
||||||
|
const isError = detail.indexing_status === 'error'
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={cn(
|
||||||
|
'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
|
||||||
|
isError && 'bg-state-destructive-hover-alt',
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
{isEmbedding && (
|
||||||
|
<div
|
||||||
|
className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
|
||||||
|
style={{ width: `${percent}%` }}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
<div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
|
||||||
|
<SourceTypeIcon
|
||||||
|
sourceType={sourceType}
|
||||||
|
name={name}
|
||||||
|
notionIcon={notionIcon}
|
||||||
|
/>
|
||||||
|
<div className="flex w-0 grow items-center gap-1" title={name}>
|
||||||
|
<div className="system-xs-medium truncate text-text-secondary">
|
||||||
|
{name}
|
||||||
|
</div>
|
||||||
|
{enableBilling && <PriorityLabel className="ml-0" />}
|
||||||
|
</div>
|
||||||
|
{isEmbedding && (
|
||||||
|
<div className="shrink-0 text-xs text-text-secondary">{`${percent}%`}</div>
|
||||||
|
)}
|
||||||
|
<StatusIcon status={detail.indexing_status} error={detail.error} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default IndexingProgressItem
|
||||||
@ -0,0 +1,133 @@
|
|||||||
|
import type { FC } from 'react'
|
||||||
|
import type { ProcessRuleResponse } from '@/models/datasets'
|
||||||
|
import Image from 'next/image'
|
||||||
|
import { useCallback } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
|
||||||
|
import { ProcessMode } from '@/models/datasets'
|
||||||
|
import { RETRIEVE_METHOD } from '@/types/app'
|
||||||
|
import { indexMethodIcon, retrievalIcon } from '../icons'
|
||||||
|
import { IndexingType } from '../step-two'
|
||||||
|
|
||||||
|
type RuleDetailProps = {
|
||||||
|
sourceData?: ProcessRuleResponse
|
||||||
|
indexingType?: string
|
||||||
|
retrievalMethod?: RETRIEVE_METHOD
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lookup table for pre-processing rule names
|
||||||
|
const PRE_PROCESSING_RULE_KEYS = {
|
||||||
|
remove_extra_spaces: 'stepTwo.removeExtraSpaces',
|
||||||
|
remove_urls_emails: 'stepTwo.removeUrlEmails',
|
||||||
|
remove_stopwords: 'stepTwo.removeStopwords',
|
||||||
|
} as const
|
||||||
|
|
||||||
|
// Lookup table for retrieval method icons
|
||||||
|
const RETRIEVAL_ICON_MAP: Partial<Record<RETRIEVE_METHOD, string>> = {
|
||||||
|
[RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
|
||||||
|
[RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
|
||||||
|
[RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
|
||||||
|
[RETRIEVE_METHOD.invertedIndex]: retrievalIcon.fullText,
|
||||||
|
[RETRIEVE_METHOD.keywordSearch]: retrievalIcon.fullText,
|
||||||
|
}
|
||||||
|
|
||||||
|
const isNumber = (value: unknown): value is number => typeof value === 'number'
|
||||||
|
|
||||||
|
const RuleDetail: FC<RuleDetailProps> = ({ sourceData, indexingType, retrievalMethod }) => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
|
||||||
|
const segmentationRuleLabels = {
|
||||||
|
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
|
||||||
|
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
|
||||||
|
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
|
||||||
|
}
|
||||||
|
|
||||||
|
const getRuleName = useCallback((key: string): string | undefined => {
|
||||||
|
const translationKey = PRE_PROCESSING_RULE_KEYS[key as keyof typeof PRE_PROCESSING_RULE_KEYS]
|
||||||
|
return translationKey ? t(translationKey, { ns: 'datasetCreation' }) : undefined
|
||||||
|
}, [t])
|
||||||
|
|
||||||
|
const getModeValue = useCallback((): string => {
|
||||||
|
if (!sourceData?.mode)
|
||||||
|
return '-'
|
||||||
|
|
||||||
|
if (sourceData.mode === ProcessMode.general)
|
||||||
|
return t('embedding.custom', { ns: 'datasetDocuments' })
|
||||||
|
|
||||||
|
const parentModeLabel = sourceData.rules?.parent_mode === 'paragraph'
|
||||||
|
? t('parentMode.paragraph', { ns: 'dataset' })
|
||||||
|
: t('parentMode.fullDoc', { ns: 'dataset' })
|
||||||
|
|
||||||
|
return `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${parentModeLabel}`
|
||||||
|
}, [sourceData, t])
|
||||||
|
|
||||||
|
const getSegmentLengthValue = useCallback((): string | number => {
|
||||||
|
if (!sourceData?.mode)
|
||||||
|
return '-'
|
||||||
|
|
||||||
|
const maxTokens = isNumber(sourceData.rules?.segmentation?.max_tokens)
|
||||||
|
? sourceData.rules.segmentation.max_tokens
|
||||||
|
: '-'
|
||||||
|
|
||||||
|
if (sourceData.mode === ProcessMode.general)
|
||||||
|
return maxTokens
|
||||||
|
|
||||||
|
const childMaxTokens = isNumber(sourceData.rules?.subchunk_segmentation?.max_tokens)
|
||||||
|
? sourceData.rules.subchunk_segmentation.max_tokens
|
||||||
|
: '-'
|
||||||
|
|
||||||
|
return `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
|
||||||
|
}, [sourceData, t])
|
||||||
|
|
||||||
|
const getTextCleaningValue = useCallback((): string => {
|
||||||
|
if (!sourceData?.mode)
|
||||||
|
return '-'
|
||||||
|
|
||||||
|
const enabledRules = sourceData.rules?.pre_processing_rules?.filter(rule => rule.enabled) || []
|
||||||
|
const ruleNames = enabledRules
|
||||||
|
.map((rule) => {
|
||||||
|
const name = getRuleName(rule.id)
|
||||||
|
return typeof name === 'string' ? name : ''
|
||||||
|
})
|
||||||
|
.filter(name => name)
|
||||||
|
return ruleNames.length > 0 ? ruleNames.join(',') : '-'
|
||||||
|
}, [sourceData, getRuleName])
|
||||||
|
|
||||||
|
const fieldValueGetters: Record<string, () => string | number> = {
|
||||||
|
mode: getModeValue,
|
||||||
|
segmentLength: getSegmentLengthValue,
|
||||||
|
textCleaning: getTextCleaningValue,
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEconomical = indexingType === IndexingType.ECONOMICAL
|
||||||
|
const indexMethodIconSrc = isEconomical ? indexMethodIcon.economical : indexMethodIcon.high_quality
|
||||||
|
const indexModeLabel = t(`stepTwo.${isEconomical ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' })
|
||||||
|
|
||||||
|
const effectiveRetrievalMethod = isEconomical ? 'keyword_search' : (retrievalMethod ?? 'semantic_search')
|
||||||
|
const retrievalLabel = t(`retrieval.${effectiveRetrievalMethod}.title`, { ns: 'dataset' })
|
||||||
|
const retrievalIconSrc = RETRIEVAL_ICON_MAP[retrievalMethod as keyof typeof RETRIEVAL_ICON_MAP] ?? retrievalIcon.vector
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col gap-1">
|
||||||
|
{Object.keys(segmentationRuleLabels).map(field => (
|
||||||
|
<FieldInfo
|
||||||
|
key={field}
|
||||||
|
label={segmentationRuleLabels[field as keyof typeof segmentationRuleLabels]}
|
||||||
|
displayedValue={String(fieldValueGetters[field]())}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
<FieldInfo
|
||||||
|
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
|
||||||
|
displayedValue={indexModeLabel}
|
||||||
|
valueIcon={<Image className="size-4" src={indexMethodIconSrc} alt="" />}
|
||||||
|
/>
|
||||||
|
<FieldInfo
|
||||||
|
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
|
||||||
|
displayedValue={retrievalLabel}
|
||||||
|
valueIcon={<Image className="size-4" src={retrievalIconSrc} alt="" />}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default RuleDetail
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
import type { FC } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
|
||||||
|
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
|
||||||
|
|
||||||
|
const UpgradeBanner: FC = () => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
|
||||||
|
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
|
||||||
|
<ZapFast className="h-4 w-4 text-[#FB6514]" />
|
||||||
|
</div>
|
||||||
|
<div className="mx-3 grow text-[13px] font-medium text-gray-700">
|
||||||
|
{t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
|
||||||
|
</div>
|
||||||
|
<UpgradeBtn loc="knowledge-speed-up" />
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default UpgradeBanner
|
||||||
@ -0,0 +1,90 @@
|
|||||||
|
import type { IndexingStatusResponse } from '@/models/datasets'
|
||||||
|
import { useEffect, useRef, useState } from 'react'
|
||||||
|
import { fetchIndexingStatusBatch } from '@/service/datasets'
|
||||||
|
|
||||||
|
const POLLING_INTERVAL = 2500
|
||||||
|
const COMPLETED_STATUSES = ['completed', 'error', 'paused'] as const
|
||||||
|
const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
|
||||||
|
|
||||||
|
type IndexingStatusPollingParams = {
|
||||||
|
datasetId: string
|
||||||
|
batchId: string
|
||||||
|
}
|
||||||
|
|
||||||
|
type IndexingStatusPollingResult = {
|
||||||
|
statusList: IndexingStatusResponse[]
|
||||||
|
isEmbedding: boolean
|
||||||
|
isEmbeddingCompleted: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const isStatusCompleted = (status: string): boolean =>
|
||||||
|
COMPLETED_STATUSES.includes(status as typeof COMPLETED_STATUSES[number])
|
||||||
|
|
||||||
|
const isAllCompleted = (statusList: IndexingStatusResponse[]): boolean =>
|
||||||
|
statusList.every(item => isStatusCompleted(item.indexing_status))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom hook for polling indexing status with automatic stop on completion.
|
||||||
|
* Handles the polling lifecycle and provides derived states for UI rendering.
|
||||||
|
*/
|
||||||
|
export const useIndexingStatusPolling = ({
|
||||||
|
datasetId,
|
||||||
|
batchId,
|
||||||
|
}: IndexingStatusPollingParams): IndexingStatusPollingResult => {
|
||||||
|
const [statusList, setStatusList] = useState<IndexingStatusResponse[]>([])
|
||||||
|
const isStopPollingRef = useRef(false)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
// Reset polling state on mount
|
||||||
|
isStopPollingRef.current = false
|
||||||
|
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
||||||
|
|
||||||
|
const fetchStatus = async (): Promise<IndexingStatusResponse[]> => {
|
||||||
|
const response = await fetchIndexingStatusBatch({ datasetId, batchId })
|
||||||
|
setStatusList(response.data)
|
||||||
|
return response.data
|
||||||
|
}
|
||||||
|
|
||||||
|
const poll = async (): Promise<void> => {
|
||||||
|
if (isStopPollingRef.current)
|
||||||
|
return
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = await fetchStatus()
|
||||||
|
if (isAllCompleted(data)) {
|
||||||
|
isStopPollingRef.current = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
// Continue polling on error
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isStopPollingRef.current) {
|
||||||
|
timeoutId = setTimeout(() => {
|
||||||
|
poll()
|
||||||
|
}, POLLING_INTERVAL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
poll()
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
isStopPollingRef.current = true
|
||||||
|
if (timeoutId)
|
||||||
|
clearTimeout(timeoutId)
|
||||||
|
}
|
||||||
|
}, [datasetId, batchId])
|
||||||
|
|
||||||
|
const isEmbedding = statusList.some(item =>
|
||||||
|
EMBEDDING_STATUSES.includes(item?.indexing_status as typeof EMBEDDING_STATUSES[number]),
|
||||||
|
)
|
||||||
|
|
||||||
|
const isEmbeddingCompleted = statusList.length > 0 && isAllCompleted(statusList)
|
||||||
|
|
||||||
|
return {
|
||||||
|
statusList,
|
||||||
|
isEmbedding,
|
||||||
|
isEmbeddingCompleted,
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,64 @@
|
|||||||
|
import type {
|
||||||
|
DataSourceInfo,
|
||||||
|
DataSourceType,
|
||||||
|
FullDocumentDetail,
|
||||||
|
IndexingStatusResponse,
|
||||||
|
LegacyDataSourceInfo,
|
||||||
|
} from '@/models/datasets'
|
||||||
|
|
||||||
|
const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type guard for legacy data source info with upload_file property
|
||||||
|
*/
|
||||||
|
export const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
|
||||||
|
return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a status indicates the source is being embedded
|
||||||
|
*/
|
||||||
|
export const isSourceEmbedding = (detail: IndexingStatusResponse): boolean =>
|
||||||
|
EMBEDDING_STATUSES.includes(detail.indexing_status as typeof EMBEDDING_STATUSES[number])
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the progress percentage for a document
|
||||||
|
*/
|
||||||
|
export const getSourcePercent = (detail: IndexingStatusResponse): number => {
|
||||||
|
const completedCount = detail.completed_segments || 0
|
||||||
|
const totalCount = detail.total_segments || 0
|
||||||
|
|
||||||
|
if (totalCount === 0)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
const percent = Math.round(completedCount * 100 / totalCount)
|
||||||
|
return Math.min(percent, 100)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get file extension from filename, defaults to 'txt'
|
||||||
|
*/
|
||||||
|
export const getFileType = (name?: string): string =>
|
||||||
|
name?.split('.').pop() || 'txt'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Document lookup utilities - provides document info by ID from a list
|
||||||
|
*/
|
||||||
|
export const createDocumentLookup = (documents: FullDocumentDetail[]) => {
|
||||||
|
const documentMap = new Map(documents.map(doc => [doc.id, doc]))
|
||||||
|
|
||||||
|
return {
|
||||||
|
getDocument: (id: string) => documentMap.get(id),
|
||||||
|
|
||||||
|
getName: (id: string) => documentMap.get(id)?.name,
|
||||||
|
|
||||||
|
getSourceType: (id: string) => documentMap.get(id)?.data_source_type as DataSourceType | undefined,
|
||||||
|
|
||||||
|
getNotionIcon: (id: string) => {
|
||||||
|
const info = documentMap.get(id)?.data_source_info
|
||||||
|
if (info && isLegacyDataSourceInfo(info))
|
||||||
|
return info.notion_page_icon
|
||||||
|
return undefined
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user