Initial commit
This commit is contained in:
+1131
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,740 @@
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime, timezone
|
||||
import os
|
||||
import json
|
||||
import PyPDF2
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import aiohttp
|
||||
from pydantic import BaseModel
|
||||
from redis import asyncio as aioredis
|
||||
|
||||
# 配置
|
||||
UPLOAD_PATH = "/obscura/task/paper"
|
||||
DEEPSEEK_API_KEY = "sk-3027fb3c810b4e17985fa397d41250b9"
|
||||
REDIS_URL = "redis://:Obscura@2024@222.186.10.253:6379"
|
||||
|
||||
# Redis数据库编号
|
||||
ANALYSIS_DB = 190 # 文献分析结果
|
||||
QA_HISTORY_DB = 191 # 问答记录
|
||||
TASK_STATUS_DB = 192 # 任务状态
|
||||
REFERENCE_DB = 193 # 文献信息
|
||||
|
||||
app = FastAPI()
|
||||
# CORS configuration
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["*"]
|
||||
)
|
||||
# 创建线程池
|
||||
pdf_thread_pool = ThreadPoolExecutor(max_workers=3)
|
||||
|
||||
# Redis连接
|
||||
async def get_redis():
|
||||
redis = aioredis.from_url(
|
||||
REDIS_URL,
|
||||
encoding="utf-8",
|
||||
decode_responses=True,
|
||||
)
|
||||
return redis
|
||||
|
||||
# 添加问答相关的模型
|
||||
class QuestionModel(BaseModel):
|
||||
"""问题模型"""
|
||||
question: str
|
||||
|
||||
class TaskStatus:
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
@app.post("/upload")
|
||||
async def upload_references(
|
||||
files: List[UploadFile] = File(...)
|
||||
):
|
||||
"""批量上传文献"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
uploaded_references = []
|
||||
|
||||
# 批量上传文件
|
||||
for file in files:
|
||||
# 验证文件类型
|
||||
allowed_types = ["application/pdf", "application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
|
||||
if file.content_type not in allowed_types:
|
||||
continue # 跳过不支持的文件类型
|
||||
|
||||
# 确保上传目录存在
|
||||
os.makedirs(UPLOAD_PATH, exist_ok=True)
|
||||
|
||||
# 生成安全的文件名
|
||||
file_extension = os.path.splitext(file.filename)[1]
|
||||
safe_filename = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{os.urandom(4).hex()}{file_extension}"
|
||||
file_path = os.path.join(UPLOAD_PATH, safe_filename)
|
||||
|
||||
# 保存文件
|
||||
with open(file_path, "wb") as buffer:
|
||||
content = await file.read()
|
||||
buffer.write(content)
|
||||
|
||||
# 生成引用ID
|
||||
reference_id = f"{int(datetime.now().timestamp())}_{os.urandom(4).hex()}"
|
||||
|
||||
# 创建引用记录
|
||||
await redis.select(REFERENCE_DB)
|
||||
reference = {
|
||||
"reference_link": file_path,
|
||||
"reference_title": file.filename,
|
||||
"upload_time": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
await redis.hmset(f"reference:{reference_id}", reference)
|
||||
|
||||
reference_info = {
|
||||
"reference_id": reference_id,
|
||||
"file_path": file_path,
|
||||
"reference_title": reference["reference_title"]
|
||||
}
|
||||
uploaded_references.append(reference_info)
|
||||
|
||||
# 为每个文献创建初始状态
|
||||
await redis.select(ANALYSIS_DB)
|
||||
report_key = f"reference_report:{reference_id}"
|
||||
initial_status = {
|
||||
"status": "processing",
|
||||
"message": "Analysis in progress"
|
||||
}
|
||||
await redis.set(report_key, json.dumps(initial_status))
|
||||
|
||||
# 在后台启动分析任务
|
||||
if uploaded_references:
|
||||
asyncio.create_task(process_batch_analysis(uploaded_references))
|
||||
|
||||
return {
|
||||
"message": f"Successfully uploaded {len(uploaded_references)} files",
|
||||
"uploaded_files": uploaded_references
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Upload error: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
@app.post("/{reference_id}/qa")
|
||||
async def ask_reference_question(
|
||||
reference_id: str,
|
||||
question: QuestionModel
|
||||
):
|
||||
"""向文献提问(异步)"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
# 验证文献是否存在
|
||||
await redis.select(REFERENCE_DB)
|
||||
reference = await redis.hgetall(f"reference:{reference_id}")
|
||||
if not reference:
|
||||
raise HTTPException(status_code=404, detail="文献不存在")
|
||||
|
||||
# 生成任务ID
|
||||
task_id = f"{int(datetime.now().timestamp())}_{os.urandom(4).hex()}"
|
||||
|
||||
# 创建后台任务
|
||||
asyncio.create_task(process_reference_question(
|
||||
task_id=task_id,
|
||||
reference_id=reference_id,
|
||||
question=question.question,
|
||||
reference=reference
|
||||
))
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"status": TaskStatus.PENDING,
|
||||
"message": "问题已提交,正在处理中"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in ask_reference_question: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
@app.get("/{task_id}")
|
||||
async def get_task_status(task_id: str):
|
||||
"""获取任务状态和结果"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
await redis.select(TASK_STATUS_DB)
|
||||
task_data = await redis.hgetall(f"task:{task_id}")
|
||||
|
||||
if not task_data:
|
||||
raise HTTPException(status_code=404, detail="任务不存在")
|
||||
|
||||
response = {
|
||||
"task_id": task_id,
|
||||
"status": task_data.get("status", TaskStatus.PENDING)
|
||||
}
|
||||
|
||||
# 如果任务完成,添加结果
|
||||
if task_data.get("status") == TaskStatus.COMPLETED:
|
||||
response.update({
|
||||
"answer": task_data.get("answer"),
|
||||
"reference_title": task_data.get("reference_title")
|
||||
})
|
||||
# 如果任务失败,添加错误信息
|
||||
elif task_data.get("status") == TaskStatus.FAILED:
|
||||
response.update({
|
||||
"error": task_data.get("error")
|
||||
})
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting task status: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
@app.get("/{reference_id}/qa/history")
|
||||
async def get_reference_qa_history(reference_id: str):
|
||||
"""获取文献问答历史记录"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
# 从Redis获取对话历史
|
||||
await redis.select(QA_HISTORY_DB)
|
||||
chat_history_key = f"chat_history:{reference_id}"
|
||||
|
||||
history_data = await redis.get(chat_history_key)
|
||||
if not history_data:
|
||||
return []
|
||||
|
||||
return json.loads(history_data)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting QA history: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
@app.get("/{reference_id}/analysis")
|
||||
async def get_reference_analysis(reference_id: str):
|
||||
"""获取文献分析结果"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
# 从Redis获取分析报告
|
||||
await redis.select(ANALYSIS_DB)
|
||||
report_key = f"reference_report:{reference_id}"
|
||||
report_data = await redis.get(report_key)
|
||||
|
||||
if not report_data:
|
||||
raise HTTPException(status_code=404, detail="文献分析报告不存在")
|
||||
|
||||
return json.loads(report_data)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting analysis report: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
async def process_batch_analysis(references: List[dict]):
|
||||
"""批量处理文献分析的后台任务"""
|
||||
redis = await get_redis()
|
||||
|
||||
# 限制并发数量
|
||||
semaphore = asyncio.Semaphore(3)
|
||||
|
||||
async def process_single_reference(ref: dict):
|
||||
async with semaphore:
|
||||
try:
|
||||
reference_id = ref["reference_id"]
|
||||
file_path = ref["file_path"]
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
print(f"Reference file not found: {file_path}")
|
||||
return
|
||||
|
||||
print(f"\n开始处理文献 {ref.get('reference_title', '未知标题')}")
|
||||
print(f"文献ID: {reference_id}")
|
||||
print(f"文件路径: {file_path}")
|
||||
|
||||
# 异步读取PDF
|
||||
print("\n=== 步骤1: 读取PDF文件 ===")
|
||||
pdf_content = await read_pdf_async(file_path)
|
||||
if not pdf_content:
|
||||
raise Exception("Failed to read PDF content")
|
||||
|
||||
# 打印字符数
|
||||
content_length = len(pdf_content)
|
||||
print(f"\n=== 步骤2: 内容长度检查 ===")
|
||||
print(f"PDF内容总字符数: {content_length}")
|
||||
|
||||
# 根据内容长度选择不同的处理方式
|
||||
if content_length <= 200000:
|
||||
print(f"\n=== 步骤3A: 使用直接分析方式 ===")
|
||||
print(f"文档长度在处理范围内 ({content_length} <= 200000)")
|
||||
# 直接分析文档内容
|
||||
print("开始分析文档内容...")
|
||||
document_analysis = await analyze_reference_document_async(pdf_content[:180000])
|
||||
if not document_analysis:
|
||||
raise Exception("Failed to analyze document")
|
||||
print("文档分析完成")
|
||||
else:
|
||||
print(f"\n=== 步骤3B: 使用分段分析方式 ===")
|
||||
print(f"文档超过200000字符 ({content_length} > 200000)")
|
||||
# 分段分析长文档
|
||||
print("\n--- 开始分段分析 ---")
|
||||
print("正在调用 analyze_long_document_async...")
|
||||
try:
|
||||
analysis_results = await analyze_long_document_async(pdf_content)
|
||||
except Exception as e:
|
||||
print(f"分段分析过程中出错: {str(e)}")
|
||||
raise
|
||||
|
||||
if not analysis_results:
|
||||
raise Exception("Failed to analyze document in segments")
|
||||
print(f"分段分析完成,共分析了 {len(analysis_results)} 个段落")
|
||||
|
||||
# 合并分析结果
|
||||
try:
|
||||
document_analysis = await merge_analysis_results(analysis_results)
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
if not document_analysis:
|
||||
raise Exception("Failed to merge analysis results")
|
||||
|
||||
# 等待一小段时间避免API限制
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# 异步分析文献价值
|
||||
try:
|
||||
value_evaluation = await analyze_reference_value_async(document_analysis)
|
||||
print(f"analyze_reference_value_async 返回结果类型: {type(value_evaluation)}")
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
if not value_evaluation:
|
||||
raise Exception("Failed to evaluate value")
|
||||
print("文献价值分析完成")
|
||||
|
||||
# 合并结果
|
||||
print("\n=== 步骤5: 保存最终结果 ===")
|
||||
analysis_result = {
|
||||
**document_analysis,
|
||||
**value_evaluation,
|
||||
"status": "completed",
|
||||
"completion_time": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
|
||||
# 保存结果
|
||||
await redis.select(ANALYSIS_DB)
|
||||
report_key = f"reference_report:{reference_id}"
|
||||
await redis.set(report_key, json.dumps(analysis_result))
|
||||
|
||||
except Exception as e:
|
||||
try:
|
||||
await redis.select(ANALYSIS_DB)
|
||||
report_key = f"reference_report:{ref['reference_id']}"
|
||||
error_status = {
|
||||
"status": "failed",
|
||||
"message": str(e),
|
||||
"error_time": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
await redis.set(report_key, json.dumps(error_status))
|
||||
except Exception as redis_error:
|
||||
print(f"Error updating Redis status: {redis_error}")
|
||||
|
||||
try:
|
||||
# 并发处理所有引用
|
||||
await asyncio.gather(
|
||||
*(process_single_reference(ref) for ref in references)
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
# 在全局范围创建线程池
|
||||
pdf_thread_pool = ThreadPoolExecutor(max_workers=3) # 限制并发PDF处理数量
|
||||
|
||||
# 创建异步HTTP客户端会话
|
||||
async def get_aiohttp_session():
|
||||
return aiohttp.ClientSession(
|
||||
base_url="https://api.deepseek.com/v1/",
|
||||
headers={"Authorization": f"Bearer {DEEPSEEK_API_KEY}"}
|
||||
)
|
||||
|
||||
async def read_pdf_async(file_path: str) -> str:
|
||||
"""在线程池中异步读取PDF"""
|
||||
def read_pdf():
|
||||
try:
|
||||
pdf_content = ""
|
||||
with open(file_path, 'rb') as file:
|
||||
pdf_reader = PyPDF2.PdfReader(file)
|
||||
for page in pdf_reader.pages:
|
||||
content = page.extract_text()
|
||||
pdf_content += content
|
||||
|
||||
# 根据内容长度返回不同的结果
|
||||
content_length = len(pdf_content)
|
||||
print(f"\nPDF原始内容字符数: {content_length}")
|
||||
|
||||
if content_length <= 180000:
|
||||
print("文档长度 ≤ 180000,返回完整内容")
|
||||
return pdf_content
|
||||
elif content_length <= 200000:
|
||||
print("文档长度在 180000-200000 之间,截取前 180000 个字符")
|
||||
return pdf_content[:180000]
|
||||
else:
|
||||
print("文档长度 > 200000,返回完整内容供分段处理")
|
||||
return pdf_content # 返回完整内容,由调用者处理分段
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading PDF: {e}")
|
||||
return ""
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(pdf_thread_pool, read_pdf)
|
||||
|
||||
async def call_deepseek_api_async(messages: list) -> dict:
|
||||
"""异步调用DeepSeek API"""
|
||||
async with await get_aiohttp_session() as session:
|
||||
async with session.post("/chat/completions", json={
|
||||
"model": "deepseek-chat",
|
||||
"messages": messages,
|
||||
"response_format": {"type": "json_object"}
|
||||
}) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return json.loads(data["choices"][0]["message"]["content"])
|
||||
else:
|
||||
raise Exception(f"API调用失败: {await response.text()}")
|
||||
|
||||
async def analyze_long_document_async(content: str) -> List[dict]:
|
||||
"""分段分析长文档"""
|
||||
# 将内容分成多个段落,每段约60000个字符(估算后约50000 tokens)
|
||||
segments = []
|
||||
content_length = len(content)
|
||||
segment_size = 60000 # 减小段落大小
|
||||
|
||||
print(f"\n开始分段处理,总字符数: {content_length}")
|
||||
print(f"每段大小: {segment_size} 字符")
|
||||
|
||||
for i in range(0, content_length, segment_size):
|
||||
segment = content[i:i + segment_size]
|
||||
segments.append(segment)
|
||||
|
||||
print(f"文档已分段,共 {len(segments)} 个段落")
|
||||
|
||||
# 对每个段落进行分析
|
||||
analysis_results = []
|
||||
for i, segment in enumerate(segments):
|
||||
print(f"\n开始分析第 {i+1}/{len(segments)} 个段落...")
|
||||
|
||||
system_prompt = f"""
|
||||
You are an AI assistant tasked with analyzing part {i+1} of {len(segments)} of an academic paper.
|
||||
Generate a comprehensive analysis in JSON format covering both basic information and content analysis.
|
||||
Note that this is part {i+1} of a longer document, so focus on the content provided.
|
||||
The JSON structure must strictly follow the provided template.
|
||||
"""
|
||||
|
||||
user_prompt = f"""Analyze the following paper segment and extract all relevant information:
|
||||
Content: {segment}
|
||||
|
||||
Generate a JSON response with the following structure:
|
||||
|
||||
{{
|
||||
"1. Basic Information": {{
|
||||
"author": "[Author name(s) and affiliations]",
|
||||
"publication_date": "[Publication date in YYYY-MM format]",
|
||||
"title": "[Full title of the document]",
|
||||
"journal_publisher": "[Journal name or publisher details]",
|
||||
"document_type": "[Type: journal article/book/conference paper etc.]"
|
||||
}},
|
||||
"2. Content Analysis": {{
|
||||
"abstract": "[Paper abstract]",
|
||||
"research_purpose": "[Main objectives and research questions]",
|
||||
"methodology": "[Research methods, data collection and analysis approaches]",
|
||||
"main_arguments": "[Key theoretical frameworks and arguments]",
|
||||
"conclusions": "[Primary findings and conclusions]",
|
||||
"innovations": "[Novel contributions and original aspects]"
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
try:
|
||||
result = await call_deepseek_api_async([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
])
|
||||
|
||||
if result:
|
||||
print(f"第 {i+1} 个段落分析完成")
|
||||
analysis_results.append(result)
|
||||
# 等待一小段时间避免API限制
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
print(f"第 {i+1} 个段落分析失败")
|
||||
except Exception as e:
|
||||
print(f"分析段落时出错: {str(e)}")
|
||||
continue
|
||||
|
||||
return analysis_results
|
||||
|
||||
async def merge_analysis_results(results: List[dict]) -> dict:
|
||||
"""合并多个分析结果"""
|
||||
if not results:
|
||||
return {}
|
||||
|
||||
print(f"开始合并 {len(results)} 个分析结果...")
|
||||
|
||||
system_prompt = """
|
||||
You are an AI assistant tasked with merging multiple analysis results of different parts of the same academic paper.
|
||||
Generate a comprehensive merged analysis in JSON format.
|
||||
The JSON structure must strictly follow the provided template.
|
||||
Ensure the merged result is coherent and eliminates redundancy.
|
||||
"""
|
||||
|
||||
user_prompt = f"""Merge the following analysis results into a single coherent analysis:
|
||||
Analysis Results: {json.dumps(results, ensure_ascii=False)}
|
||||
|
||||
Generate a JSON response with the following structure:
|
||||
|
||||
{{
|
||||
"1. Basic Information": {{
|
||||
"author": "[Merged author information]",
|
||||
"publication_date": "[Publication date]",
|
||||
"title": "[Complete title]",
|
||||
"journal_publisher": "[Journal/publisher information]",
|
||||
"document_type": "[Document type]"
|
||||
}},
|
||||
"2. Content Analysis": {{
|
||||
"abstract": "[Complete abstract]",
|
||||
"research_purpose": "[Comprehensive research objectives]",
|
||||
"methodology": "[Complete methodology description]",
|
||||
"main_arguments": "[Comprehensive theoretical frameworks and arguments]",
|
||||
"conclusions": "[Complete findings and conclusions]",
|
||||
"innovations": "[Complete list of innovations]"
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
result = await call_deepseek_api_async([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
])
|
||||
|
||||
if result:
|
||||
print("分析结果合并完成")
|
||||
else:
|
||||
print("分析结果合并失败")
|
||||
|
||||
return result
|
||||
|
||||
async def analyze_reference_document_async(content: str):
|
||||
"""分析文献的基本信息和内容"""
|
||||
system_prompt = """
|
||||
You are an AI assistant tasked with analyzing academic paper.
|
||||
Generate a comprehensive analysis in JSON format covering both basic information and content analysis.
|
||||
The JSON structure must strictly follow the provided template.
|
||||
"""
|
||||
|
||||
user_prompt = f"""Analyze the following paper and extract all relevant information:
|
||||
Content: {content}
|
||||
|
||||
Generate a JSON response with the following structure:
|
||||
|
||||
{{
|
||||
"1. Basic Information": {{
|
||||
"author": "[Author name(s) and affiliations]",
|
||||
"publication_date": "[Publication date in YYYY-MM format]",
|
||||
"title": "[Full title of the document]",
|
||||
"journal_publisher": "[Journal name or publisher details]",
|
||||
"document_type": "[Type: journal article/book/conference paper etc.]"
|
||||
}},
|
||||
"2. Content Analysis": {{
|
||||
"abstract": "[Paper abstract]",
|
||||
"research_purpose": "[Main objectives and research questions]",
|
||||
"methodology": "[Research methods, data collection and analysis approaches]",
|
||||
"main_arguments": "[Key theoretical frameworks and arguments]",
|
||||
"conclusions": "[Primary findings and conclusions]",
|
||||
"innovations": "[Novel contributions and original aspects]"
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
return await call_deepseek_api_async([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
])
|
||||
|
||||
async def analyze_reference_value_async(content_analysis: dict):
|
||||
"""基于内容分析结果评估文献的价值"""
|
||||
system_prompt = """
|
||||
You are an AI assistant tasked with evaluating the value of academic paper based on its content analysis.
|
||||
Generate a comprehensive value evaluation in JSON format.
|
||||
The JSON structure must strictly follow the provided template.
|
||||
"""
|
||||
|
||||
user_prompt = f"""Based on the following content analysis, evaluate the paper's value:
|
||||
Content Analysis: {json.dumps(content_analysis, ensure_ascii=False)}
|
||||
|
||||
Generate a JSON response with the following structure:
|
||||
|
||||
{{
|
||||
"3. Value Evaluation": {{
|
||||
"academic_contribution": "[Significance to the field of study]",
|
||||
"practical_significance": "[Real-world applications and implications]",
|
||||
"limitations": "[Research constraints and weaknesses]",
|
||||
"implications": "[Suggestions for future research and practice]"
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
return await call_deepseek_api_async([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
])
|
||||
|
||||
|
||||
async def process_reference_question(task_id: str, reference_id: str, question: str, reference: dict):
|
||||
"""处理文献问答的后台任务"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
# 更新任务状态为处理中
|
||||
await redis.select(TASK_STATUS_DB)
|
||||
await redis.hset(
|
||||
f"task:{task_id}",
|
||||
mapping={
|
||||
"status": TaskStatus.PROCESSING,
|
||||
"reference_id": reference_id,
|
||||
"question": question
|
||||
}
|
||||
)
|
||||
|
||||
# 从Redis获取分析报告
|
||||
await redis.select(ANALYSIS_DB)
|
||||
report_key = f"reference_report:{reference_id}"
|
||||
report_data = await redis.get(report_key)
|
||||
|
||||
if not report_data:
|
||||
raise Exception("文献分析报告不存在,请先进行分析")
|
||||
|
||||
# 读取PDF文件内容
|
||||
file_path = reference.get("reference_link")
|
||||
if not file_path or not os.path.exists(file_path):
|
||||
raise Exception("文献文件不存在")
|
||||
|
||||
# 读取PDF内容
|
||||
pdf_content = ""
|
||||
with open(file_path, 'rb') as file:
|
||||
pdf_reader = PyPDF2.PdfReader(file)
|
||||
for page in pdf_reader.pages:
|
||||
content = page.extract_text()
|
||||
pdf_content += content
|
||||
|
||||
# 限制内容长度
|
||||
MAX_CHARS = 180000
|
||||
pdf_content = pdf_content[:MAX_CHARS]
|
||||
|
||||
# 构建系统提示和用户提示
|
||||
system_prompt = """
|
||||
你是一个专业的学术助手,负责回答关于学术文献的问题。
|
||||
你应该基于文献内容和分析报告提供准确、专业的回答。
|
||||
回答应当简洁明了,并尽可能引用文献中的具体内容。
|
||||
"""
|
||||
|
||||
# 构建上下文
|
||||
context = {
|
||||
"文献内容": pdf_content,
|
||||
"分析报告": json.loads(report_data)
|
||||
}
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": f"基于以下文献内容和分析报告回答问题:\n\n文献信息:{json.dumps(context, ensure_ascii=False)}\n\n问题:{question}"}
|
||||
]
|
||||
|
||||
# 调用DeepSeek API获取回答
|
||||
response = await call_deepseek_api_async(messages)
|
||||
answer = response.get("answer", "")
|
||||
|
||||
# 保存对话历史到Redis
|
||||
await redis.select(QA_HISTORY_DB)
|
||||
chat_history_key = f"chat_history:{reference_id}"
|
||||
|
||||
# 获取现有历史记录
|
||||
existing_history = await redis.get(chat_history_key)
|
||||
history = json.loads(existing_history) if existing_history else []
|
||||
|
||||
# 添加新的对话
|
||||
history.append({
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
})
|
||||
|
||||
# 保存更新后的历史记录
|
||||
await redis.set(chat_history_key, json.dumps(history))
|
||||
|
||||
# 更新任务状态为完成
|
||||
await redis.select(TASK_STATUS_DB)
|
||||
await redis.hset(
|
||||
f"task:{task_id}",
|
||||
mapping={
|
||||
"status": TaskStatus.COMPLETED,
|
||||
"answer": answer,
|
||||
"reference_title": reference.get("reference_title")
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing question: {e}")
|
||||
# 更新任务状态为失败
|
||||
await redis.select(TASK_STATUS_DB)
|
||||
await redis.hset(
|
||||
f"task:{task_id}",
|
||||
mapping={
|
||||
"status": TaskStatus.FAILED,
|
||||
"error": str(e)
|
||||
}
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=9005)
|
||||
Reference in New Issue
Block a user