mirror of
https://github.com/langgenius/dify.git
synced 2026-01-14 06:07:33 +08:00
Signed-off-by: -LAN- <laipz8200@outlook.com> Signed-off-by: kenwoodjw <blackxin55+@gmail.com> Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com> Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: zhanluxianshen <zhanluxianshen@163.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: GuanMu <ballmanjq@gmail.com> Co-authored-by: Davide Delbianco <davide.delbianco@outlook.com> Co-authored-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com> Co-authored-by: kenwoodjw <blackxin55+@gmail.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: Yongtao Huang <99629139+hyongtao-db@users.noreply.github.com> Co-authored-by: Qiang Lee <18018968632@163.com> Co-authored-by: 李强04 <liqiang04@gaotu.cn> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Asuka Minato <i@asukaminato.eu.org> Co-authored-by: Matri Qi <matrixdom@126.com> Co-authored-by: huayaoyue6 <huayaoyue@163.com> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: znn <jubinkumarsoni@gmail.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Muke Wang <shaodwaaron@gmail.com> Co-authored-by: wangmuke <wangmuke@kingsware.cn> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Eric Guo <eric.guocz@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: jiangbo721 <jiangbo721@163.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: hjlarry <25834719+hjlarry@users.noreply.github.com> Co-authored-by: lxsummer <35754229+lxjustdoit@users.noreply.github.com> Co-authored-by: 湛露先生 <zhanluxianshen@163.com> Co-authored-by: Guangdong Liu <liugddx@gmail.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Yessenia-d <yessenia.contact@gmail.com> Co-authored-by: huangzhuo1949 <167434202+huangzhuo1949@users.noreply.github.com> Co-authored-by: huangzhuo <huangzhuo1@xiaomi.com> Co-authored-by: 17hz <0x149527@gmail.com> Co-authored-by: Amy <1530140574@qq.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Nite Knite <nkCoding@gmail.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> Co-authored-by: Petrus Han <petrus.hanks@gmail.com> Co-authored-by: iamjoel <2120155+iamjoel@users.noreply.github.com> Co-authored-by: Kalo Chin <frog.beepers.0n@icloud.com> Co-authored-by: Ujjwal Maurya <ujjwalsbx@gmail.com> Co-authored-by: Maries <xh001x@hotmail.com>
269 lines
12 KiB
Python
269 lines
12 KiB
Python
import copy
|
|
import logging
|
|
from typing import Optional
|
|
|
|
from flask_login import current_user
|
|
|
|
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
|
from extensions.ext_database import db
|
|
from extensions.ext_redis import redis_client
|
|
from libs.datetime_utils import naive_utc_now
|
|
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
|
|
from services.dataset_service import DocumentService
|
|
from services.entities.knowledge_entities.knowledge_entities import (
|
|
MetadataArgs,
|
|
MetadataOperationData,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MetadataService:
|
|
@staticmethod
|
|
def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata:
|
|
# check if metadata name is too long
|
|
if len(metadata_args.name) > 255:
|
|
raise ValueError("Metadata name cannot exceed 255 characters.")
|
|
|
|
# check if metadata name already exists
|
|
if (
|
|
db.session.query(DatasetMetadata)
|
|
.filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
|
|
.first()
|
|
):
|
|
raise ValueError("Metadata name already exists.")
|
|
for field in BuiltInField:
|
|
if field.value == metadata_args.name:
|
|
raise ValueError("Metadata name already exists in Built-in fields.")
|
|
metadata = DatasetMetadata(
|
|
tenant_id=current_user.current_tenant_id,
|
|
dataset_id=dataset_id,
|
|
type=metadata_args.type,
|
|
name=metadata_args.name,
|
|
created_by=current_user.id,
|
|
)
|
|
db.session.add(metadata)
|
|
db.session.commit()
|
|
return metadata
|
|
|
|
@staticmethod
|
|
def update_metadata_name(dataset_id: str, metadata_id: str, name: str) -> DatasetMetadata: # type: ignore
|
|
# check if metadata name is too long
|
|
if len(name) > 255:
|
|
raise ValueError("Metadata name cannot exceed 255 characters.")
|
|
|
|
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
|
# check if metadata name already exists
|
|
if (
|
|
db.session.query(DatasetMetadata)
|
|
.filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=name)
|
|
.first()
|
|
):
|
|
raise ValueError("Metadata name already exists.")
|
|
for field in BuiltInField:
|
|
if field.value == name:
|
|
raise ValueError("Metadata name already exists in Built-in fields.")
|
|
try:
|
|
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
|
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
|
|
if metadata is None:
|
|
raise ValueError("Metadata not found.")
|
|
old_name = metadata.name
|
|
metadata.name = name
|
|
metadata.updated_by = current_user.id
|
|
metadata.updated_at = naive_utc_now()
|
|
|
|
# update related documents
|
|
dataset_metadata_bindings = (
|
|
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
|
|
)
|
|
if dataset_metadata_bindings:
|
|
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
|
documents = DocumentService.get_document_by_ids(document_ids)
|
|
for document in documents:
|
|
if not document.doc_metadata:
|
|
doc_metadata = {}
|
|
else:
|
|
doc_metadata = copy.deepcopy(document.doc_metadata)
|
|
value = doc_metadata.pop(old_name, None)
|
|
doc_metadata[name] = value
|
|
document.doc_metadata = doc_metadata
|
|
db.session.add(document)
|
|
db.session.commit()
|
|
return metadata # type: ignore
|
|
except Exception:
|
|
logger.exception("Update metadata name failed")
|
|
finally:
|
|
redis_client.delete(lock_key)
|
|
|
|
@staticmethod
|
|
def delete_metadata(dataset_id: str, metadata_id: str):
|
|
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
|
try:
|
|
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
|
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
|
|
if metadata is None:
|
|
raise ValueError("Metadata not found.")
|
|
db.session.delete(metadata)
|
|
|
|
# deal related documents
|
|
dataset_metadata_bindings = (
|
|
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
|
|
)
|
|
if dataset_metadata_bindings:
|
|
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
|
documents = DocumentService.get_document_by_ids(document_ids)
|
|
for document in documents:
|
|
if not document.doc_metadata:
|
|
doc_metadata = {}
|
|
else:
|
|
doc_metadata = copy.deepcopy(document.doc_metadata)
|
|
doc_metadata.pop(metadata.name, None)
|
|
document.doc_metadata = doc_metadata
|
|
db.session.add(document)
|
|
db.session.commit()
|
|
return metadata
|
|
except Exception:
|
|
logger.exception("Delete metadata failed")
|
|
finally:
|
|
redis_client.delete(lock_key)
|
|
|
|
@staticmethod
|
|
def get_built_in_fields():
|
|
return [
|
|
{"name": BuiltInField.document_name.value, "type": "string"},
|
|
{"name": BuiltInField.uploader.value, "type": "string"},
|
|
{"name": BuiltInField.upload_date.value, "type": "time"},
|
|
{"name": BuiltInField.last_update_date.value, "type": "time"},
|
|
{"name": BuiltInField.source.value, "type": "string"},
|
|
]
|
|
|
|
@staticmethod
|
|
def enable_built_in_field(dataset: Dataset):
|
|
if dataset.built_in_field_enabled:
|
|
return
|
|
lock_key = f"dataset_metadata_lock_{dataset.id}"
|
|
try:
|
|
MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
|
|
db.session.add(dataset)
|
|
documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
|
|
if documents:
|
|
for document in documents:
|
|
if not document.doc_metadata:
|
|
doc_metadata = {}
|
|
else:
|
|
doc_metadata = copy.deepcopy(document.doc_metadata)
|
|
doc_metadata[BuiltInField.document_name.value] = document.name
|
|
doc_metadata[BuiltInField.uploader.value] = document.uploader
|
|
doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
|
|
doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
|
|
doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
|
|
document.doc_metadata = doc_metadata
|
|
db.session.add(document)
|
|
dataset.built_in_field_enabled = True
|
|
db.session.commit()
|
|
except Exception:
|
|
logger.exception("Enable built-in field failed")
|
|
finally:
|
|
redis_client.delete(lock_key)
|
|
|
|
@staticmethod
|
|
def disable_built_in_field(dataset: Dataset):
|
|
if not dataset.built_in_field_enabled:
|
|
return
|
|
lock_key = f"dataset_metadata_lock_{dataset.id}"
|
|
try:
|
|
MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
|
|
db.session.add(dataset)
|
|
documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
|
|
document_ids = []
|
|
if documents:
|
|
for document in documents:
|
|
if not document.doc_metadata:
|
|
doc_metadata = {}
|
|
else:
|
|
doc_metadata = copy.deepcopy(document.doc_metadata)
|
|
doc_metadata.pop(BuiltInField.document_name.value, None)
|
|
doc_metadata.pop(BuiltInField.uploader.value, None)
|
|
doc_metadata.pop(BuiltInField.upload_date.value, None)
|
|
doc_metadata.pop(BuiltInField.last_update_date.value, None)
|
|
doc_metadata.pop(BuiltInField.source.value, None)
|
|
document.doc_metadata = doc_metadata
|
|
db.session.add(document)
|
|
document_ids.append(document.id)
|
|
dataset.built_in_field_enabled = False
|
|
db.session.commit()
|
|
except Exception:
|
|
logger.exception("Disable built-in field failed")
|
|
finally:
|
|
redis_client.delete(lock_key)
|
|
|
|
@staticmethod
|
|
def update_documents_metadata(dataset: Dataset, metadata_args: MetadataOperationData):
|
|
for operation in metadata_args.operation_data:
|
|
lock_key = f"document_metadata_lock_{operation.document_id}"
|
|
try:
|
|
MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
|
|
document = DocumentService.get_document(dataset.id, operation.document_id)
|
|
if document is None:
|
|
raise ValueError("Document not found.")
|
|
doc_metadata = {}
|
|
for metadata_value in operation.metadata_list:
|
|
doc_metadata[metadata_value.name] = metadata_value.value
|
|
if dataset.built_in_field_enabled:
|
|
doc_metadata[BuiltInField.document_name.value] = document.name
|
|
doc_metadata[BuiltInField.uploader.value] = document.uploader
|
|
doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
|
|
doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
|
|
doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
|
|
document.doc_metadata = doc_metadata
|
|
db.session.add(document)
|
|
db.session.commit()
|
|
# deal metadata binding
|
|
db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
|
|
for metadata_value in operation.metadata_list:
|
|
dataset_metadata_binding = DatasetMetadataBinding(
|
|
tenant_id=current_user.current_tenant_id,
|
|
dataset_id=dataset.id,
|
|
document_id=operation.document_id,
|
|
metadata_id=metadata_value.id,
|
|
created_by=current_user.id,
|
|
)
|
|
db.session.add(dataset_metadata_binding)
|
|
db.session.commit()
|
|
except Exception:
|
|
logger.exception("Update documents metadata failed")
|
|
finally:
|
|
redis_client.delete(lock_key)
|
|
|
|
@staticmethod
|
|
def knowledge_base_metadata_lock_check(dataset_id: Optional[str], document_id: Optional[str]):
|
|
if dataset_id:
|
|
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
|
if redis_client.get(lock_key):
|
|
raise ValueError("Another knowledge base metadata operation is running, please wait a moment.")
|
|
redis_client.set(lock_key, 1, ex=3600)
|
|
if document_id:
|
|
lock_key = f"document_metadata_lock_{document_id}"
|
|
if redis_client.get(lock_key):
|
|
raise ValueError("Another document metadata operation is running, please wait a moment.")
|
|
redis_client.set(lock_key, 1, ex=3600)
|
|
|
|
@staticmethod
|
|
def get_dataset_metadatas(dataset: Dataset):
|
|
return {
|
|
"doc_metadata": [
|
|
{
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": item.get("type"),
|
|
"count": db.session.query(DatasetMetadataBinding)
|
|
.filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
|
|
.count(),
|
|
}
|
|
for item in dataset.doc_metadata or []
|
|
if item.get("id") != "built-in"
|
|
],
|
|
"built_in_field_enabled": dataset.built_in_field_enabled,
|
|
}
|