接入了文件管理系统,统一使用hash获取报告
This commit is contained in:
+253
-58
@@ -437,14 +437,14 @@
|
||||
// paper card rendering function
|
||||
function renderPaperCard(paper) {
|
||||
const uploadTime = paper.upload_time;
|
||||
const cardId = `paper-${paper._id}`;
|
||||
const cardId = `paper-${paper.file_hash}`;
|
||||
|
||||
return `
|
||||
<div class="card mb-3" id="${cardId}">
|
||||
<div class="card-body d-flex position-relative">
|
||||
<div class="flex-grow-1" style="cursor: pointer;" onclick="DetailFromCard(this.closest('.card'))">
|
||||
<h5 class="card-title">${paper.paper_title}</h5>
|
||||
<div class="card-text analysis-content-${paper._id}">
|
||||
<div class="card-text analysis-content-${paper.file_hash}">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="spinner-border spinner-border-sm text-primary me-2" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
@@ -458,7 +458,7 @@
|
||||
View Details
|
||||
</button>
|
||||
<button class="btn btn-danger btn-sm"
|
||||
onclick="event.stopPropagation(); deletepaper('${paper._id}')">
|
||||
onclick="event.stopPropagation(); deletePaper('${paper.file_hash}')">
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
@@ -466,14 +466,14 @@
|
||||
<div class="position-absolute" style="right: 1rem; top: 50%; transform: translateY(-50%);">
|
||||
<button class="btn rounded-circle d-flex align-items-center justify-content-center"
|
||||
style="width: 48px; height: 48px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); background: var(--primary-color); color: white;"
|
||||
onclick="event.stopPropagation(); openChatDialog('${paper._id}')">
|
||||
onclick="event.stopPropagation(); openChatDialog('${paper.file_hash}')">
|
||||
<i class="bi bi-chat-dots" style="font-size: 1.2rem;"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-footer text-muted">
|
||||
Upload Time: ${uploadTime}
|
||||
<span class="ms-2 analysis-status-${paper._id}">Analysis in progress...</span>
|
||||
<span class="ms-2 analysis-status-${paper.file_hash}">Analysis in progress...</span>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
@@ -511,7 +511,7 @@
|
||||
papers.forEach(async (ref) => {
|
||||
try {
|
||||
const checkAnalysisStatus = async () => {
|
||||
const analysisResponse = await fetch(`${API_BASE_URL}/${ref._id}/report`);
|
||||
const analysisResponse = await fetch(`${API_BASE_URL}/report/${ref.file_hash}`);
|
||||
|
||||
if (!analysisResponse.ok) {
|
||||
throw new Error('Failed to fetch analysis');
|
||||
@@ -519,10 +519,10 @@
|
||||
|
||||
const analysisResult = await analysisResponse.json();
|
||||
// 存储报告数据
|
||||
paperReports.set(ref._id, analysisResult);
|
||||
paperReports.set(ref.file_hash, analysisResult);
|
||||
|
||||
const contentElement = document.querySelector(`.analysis-content-${ref._id}`);
|
||||
const statusElement = document.querySelector(`.analysis-status-${ref._id}`);
|
||||
const contentElement = document.querySelector(`.analysis-content-${ref.file_hash}`);
|
||||
const statusElement = document.querySelector(`.analysis-status-${ref.file_hash}`);
|
||||
|
||||
if (analysisResult.status === 'processing') {
|
||||
// Processing status
|
||||
@@ -552,7 +552,7 @@
|
||||
journal = basicInfo.journal_publisher || 'N/A';
|
||||
|
||||
// 更新卡片标题为分析结果中的标题
|
||||
const titleElement = document.querySelector(`#paper-${ref._id} .card-title`);
|
||||
const titleElement = document.querySelector(`#paper-${ref.file_hash} .card-title`);
|
||||
if (titleElement && basicInfo.title) {
|
||||
titleElement.textContent = basicInfo.title;
|
||||
}
|
||||
@@ -574,6 +574,10 @@
|
||||
<i class="bi bi-exclamation-triangle me-2"></i>
|
||||
Analysis failed
|
||||
</div>
|
||||
<button class="btn btn-primary btn-sm" onclick="retryAnalysis('${ref.file_hash}', '${ref.paper_title}')">
|
||||
<i class="bi bi-arrow-clockwise me-1"></i>
|
||||
Retry
|
||||
</button>
|
||||
`;
|
||||
statusElement.innerHTML = `<span class="text-danger">Analysis failed: ${analysisResult.message || 'Unknown error'}</span>`;
|
||||
}
|
||||
@@ -582,9 +586,9 @@
|
||||
// 开始首次检查
|
||||
await checkAnalysisStatus();
|
||||
} catch (error) {
|
||||
console.error(`Failed to fetch analysis for paper ${ref._id}:`, error);
|
||||
const contentElement = document.querySelector(`.analysis-content-${ref._id}`);
|
||||
const statusElement = document.querySelector(`.analysis-status-${ref._id}`);
|
||||
console.error(`Failed to fetch analysis for paper ${ref.file_hash}:`, error);
|
||||
const contentElement = document.querySelector(`.analysis-content-${ref.file_hash}`);
|
||||
const statusElement = document.querySelector(`.analysis-status-${ref.file_hash}`);
|
||||
|
||||
contentElement.innerHTML = `
|
||||
<div class="text-danger">
|
||||
@@ -610,7 +614,6 @@
|
||||
}
|
||||
|
||||
async function uploadPaper() {
|
||||
|
||||
const input = document.createElement('input');
|
||||
input.type = 'file';
|
||||
input.accept = '.pdf,.doc,.docx';
|
||||
@@ -663,37 +666,114 @@
|
||||
titleElement.parentNode.parentNode.insertBefore(progressCard, titleElement.parentNode);
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
files.forEach(file => {
|
||||
formData.append('files', file);
|
||||
});
|
||||
// 处理每个文件
|
||||
for (const file of files) {
|
||||
progressText.textContent = `Processing ${file.name}...`;
|
||||
|
||||
// 使用 XMLHttpRequest 来获取上传进度
|
||||
const xhr = new XMLHttpRequest();
|
||||
// 计算文件的 SHA256 哈希值
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const hashBuffer = await crypto.subtle.digest('SHA-256', arrayBuffer);
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
|
||||
xhr.upload.onprogress = function(e) {
|
||||
if (e.lengthComputable) {
|
||||
const percentComplete = (e.loaded / e.total) * 100;
|
||||
progressFill.style.width = percentComplete + '%';
|
||||
progressText.textContent = `Uploading: ${Math.round(percentComplete)}%`;
|
||||
// 检查是否已有报告
|
||||
try {
|
||||
const reportResponse = await fetch(`${API_BASE_URL}/check/${hashHex}`);
|
||||
if (reportResponse.ok) {
|
||||
const reportData = await reportResponse.json();
|
||||
if (reportData.status === 'completed') {
|
||||
progressText.textContent = `Report already exists for ${file.name}, skipping analysis...`;
|
||||
progressFill.style.width = '100%';
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error checking report:', error);
|
||||
// 如果检查失败,继续上传流程
|
||||
}
|
||||
};
|
||||
|
||||
const uploadPromise = new Promise((resolve, reject) => {
|
||||
xhr.onload = function() {
|
||||
if (xhr.status === 200) {
|
||||
resolve(JSON.parse(xhr.response));
|
||||
} else {
|
||||
reject(new Error('Upload failed'));
|
||||
// 检查哈希值是否存在
|
||||
try {
|
||||
const checkResponse = await fetch(`https://files.aiot.ml/checkhash/${hashHex}`);
|
||||
if (checkResponse.ok) {
|
||||
// 哈希值已存在,直接使用
|
||||
progressText.textContent = `File ${file.name} already exists, skipping upload...`;
|
||||
progressFill.style.width = '100%';
|
||||
|
||||
// 将文件信息发送到后端 API
|
||||
const response = await fetch(`${API_BASE_URL}/upload`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
files: [{
|
||||
filename: file.name,
|
||||
hash: hashHex
|
||||
}]
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to process file');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error checking hash:', error);
|
||||
// 如果检查失败,继续上传流程
|
||||
}
|
||||
|
||||
// 创建 FormData 对象
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
formData.append('client_hash', hashHex);
|
||||
|
||||
// 使用 XMLHttpRequest 来获取上传进度
|
||||
const xhr = new XMLHttpRequest();
|
||||
|
||||
xhr.upload.onprogress = function(e) {
|
||||
if (e.lengthComputable) {
|
||||
const percentComplete = (e.loaded / e.total) * 100;
|
||||
progressFill.style.width = percentComplete + '%';
|
||||
progressText.textContent = `Uploading ${file.name}: ${Math.round(percentComplete)}%`;
|
||||
}
|
||||
};
|
||||
xhr.onerror = () => reject(new Error('Upload failed'));
|
||||
});
|
||||
|
||||
xhr.open('POST', `${API_BASE_URL}/upload`);
|
||||
xhr.send(formData);
|
||||
const uploadPromise = new Promise((resolve, reject) => {
|
||||
xhr.onload = function() {
|
||||
if (xhr.status === 200) {
|
||||
resolve(JSON.parse(xhr.response));
|
||||
} else {
|
||||
reject(new Error('Upload failed'));
|
||||
}
|
||||
};
|
||||
xhr.onerror = () => reject(new Error('Upload failed'));
|
||||
});
|
||||
|
||||
await uploadPromise;
|
||||
// 发送到文件存储服务
|
||||
xhr.open('POST', 'https://files.aiot.ml/upload');
|
||||
xhr.send(formData);
|
||||
|
||||
const uploadResult = await uploadPromise;
|
||||
|
||||
// 将文件信息发送到后端 API
|
||||
const response = await fetch(`${API_BASE_URL}/upload`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
files: [{
|
||||
filename: file.name,
|
||||
hash: uploadResult.hash
|
||||
}]
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to process file');
|
||||
}
|
||||
}
|
||||
|
||||
// 上传完成后移除进度条
|
||||
setTimeout(() => {
|
||||
@@ -718,12 +798,12 @@
|
||||
}
|
||||
|
||||
// View paper details function
|
||||
async function viewDetail(paperId) {
|
||||
async function viewDetail(fileHash) {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/${paperId}/report`);
|
||||
const response = await fetch(`${API_BASE_URL}/report/${fileHash}`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to fetch analysis report');
|
||||
throw new Error('获取分析报告失败');
|
||||
}
|
||||
|
||||
const analysisResult = await response.json();
|
||||
@@ -733,9 +813,9 @@
|
||||
modalContent = `
|
||||
<div class="d-flex align-items-center justify-content-center p-5">
|
||||
<div class="spinner-border text-primary me-3" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
<span class="visually-hidden">加载中...</span>
|
||||
</div>
|
||||
<h5 class="mb-0">Analyzing Paper content, please wait...</h5>
|
||||
<h5 class="mb-0">正在分析文献内容,请稍候...</h5>
|
||||
</div>
|
||||
`;
|
||||
} else if (analysisResult.status === 'completed') {
|
||||
@@ -856,30 +936,30 @@
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to view details:', error);
|
||||
alert('Failed to get analysis report, please try again later');
|
||||
console.error('查看详情失败:', error);
|
||||
alert('获取分析报告失败,请稍后重试');
|
||||
}
|
||||
}
|
||||
|
||||
// View paper details from card
|
||||
function DetailFromCard(card) {
|
||||
const paperId = card.id.replace('paper-', '');
|
||||
viewDetail(paperId);
|
||||
const fileHash = card.id.replace('paper-', '');
|
||||
viewDetail(fileHash);
|
||||
}
|
||||
|
||||
// Delete paper function
|
||||
async function deletePaper(paperId) {
|
||||
async function deletePaper(fileHash) {
|
||||
if (!confirm('Are you sure you want to delete this Paper? This action cannot be undone.')) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/delete/${paperId}`, {
|
||||
const response = await fetch(`${API_BASE_URL}/delete/${fileHash}`, {
|
||||
method: 'DELETE'
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const card = document.getElementById(`paper-${paperId}`);
|
||||
const card = document.getElementById(`paper-${fileHash}`);
|
||||
if (card) {
|
||||
card.remove();
|
||||
}
|
||||
@@ -901,10 +981,10 @@
|
||||
}
|
||||
|
||||
// Add chat dialog function
|
||||
async function openChatDialog(paperId) {
|
||||
async function openChatDialog(fileHash) {
|
||||
try {
|
||||
// 使用已获取的报告数据
|
||||
const report = paperReports.get(paperId);
|
||||
const report = paperReports.get(fileHash);
|
||||
let paperTitle = 'Chat';
|
||||
|
||||
// 从报告中获取标题
|
||||
@@ -936,7 +1016,7 @@
|
||||
aria-label="Question input">
|
||||
<button class="btn"
|
||||
style="background: var(--primary-color); color: white;"
|
||||
onclick="sendQuestion('${paperId}')"
|
||||
onclick="sendQuestion('${fileHash}')"
|
||||
aria-label="Send question">
|
||||
Send
|
||||
</button>
|
||||
@@ -977,13 +1057,13 @@
|
||||
modalInstance.show();
|
||||
|
||||
// 加载聊天历史
|
||||
await loadChatHistory(paperId);
|
||||
await loadChatHistory(fileHash);
|
||||
|
||||
// 添加回车键事件监听器
|
||||
const input = document.getElementById('questionInput');
|
||||
input.addEventListener('keypress', (e) => {
|
||||
if (e.key === 'Enter') {
|
||||
sendQuestion(paperId);
|
||||
sendQuestion(fileHash);
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -993,10 +1073,10 @@
|
||||
}
|
||||
|
||||
// Load chat history function
|
||||
async function loadChatHistory(paperId) {
|
||||
async function loadChatHistory(fileHash) {
|
||||
const chatHistory = document.getElementById('chatHistory');
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/${paperId}/qa/history`);
|
||||
const response = await fetch(`${API_BASE_URL}/${fileHash}/qa/history`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load chat history');
|
||||
@@ -1067,7 +1147,7 @@
|
||||
}
|
||||
|
||||
// Send question function
|
||||
async function sendQuestion(paperId) {
|
||||
async function sendQuestion(fileHash) {
|
||||
const input = document.getElementById('questionInput');
|
||||
const question = input.value.trim();
|
||||
|
||||
@@ -1112,7 +1192,7 @@
|
||||
chatHistory.appendChild(loadingDiv);
|
||||
chatHistory.scrollTop = chatHistory.scrollHeight;
|
||||
|
||||
const response = await fetch(`${API_BASE_URL}/${paperId}/qa`, {
|
||||
const response = await fetch(`${API_BASE_URL}/${fileHash}/qa`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
@@ -1224,7 +1304,122 @@
|
||||
alert('Failed to download report');
|
||||
}
|
||||
}
|
||||
// 简化的重试函数,复用现有的上传流程
|
||||
async function retryAnalysis(fileHash, paperTitle) {
|
||||
try {
|
||||
// 更新UI状态
|
||||
const contentElement = document.querySelector(`.analysis-content-${fileHash}`);
|
||||
const statusElement = document.querySelector(`.analysis-status-${fileHash}`);
|
||||
|
||||
contentElement.innerHTML = `
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="spinner-border spinner-border-sm text-primary me-2" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
<span>Retrying analysis...</span>
|
||||
</div>
|
||||
`;
|
||||
statusElement.innerHTML = '<span class="text-primary">Retrying analysis...</span>';
|
||||
|
||||
// 检查文件是否存在
|
||||
let fileExists = false;
|
||||
try {
|
||||
const checkResponse = await fetch(`https://files.aiot.ml/checkhash/${fileHash}`);
|
||||
fileExists = checkResponse.ok;
|
||||
} catch (error) {
|
||||
console.error('Error checking hash:', error);
|
||||
}
|
||||
|
||||
if (!fileExists) {
|
||||
// 如果文件不存在,需要重新上传
|
||||
try {
|
||||
// 从原始URL获取文件内容
|
||||
const fileResponse = await fetch(`https://files.aiot.ml/pdf/${fileHash}`);
|
||||
if (!fileResponse.ok) {
|
||||
throw new Error('Failed to fetch original file');
|
||||
}
|
||||
const fileBlob = await fileResponse.blob();
|
||||
|
||||
// 创建FormData对象重新上传
|
||||
const formData = new FormData();
|
||||
formData.append('file', fileBlob, paperTitle);
|
||||
formData.append('client_hash', fileHash);
|
||||
|
||||
// 使用XMLHttpRequest上传文件
|
||||
const xhr = new XMLHttpRequest();
|
||||
|
||||
// 创建上传进度提示
|
||||
contentElement.innerHTML = `
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="spinner-border spinner-border-sm text-primary me-2" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
<span>Re-uploading file...</span>
|
||||
</div>
|
||||
`;
|
||||
|
||||
const uploadPromise = new Promise((resolve, reject) => {
|
||||
xhr.onload = function() {
|
||||
if (xhr.status === 200) {
|
||||
resolve(JSON.parse(xhr.response));
|
||||
} else {
|
||||
reject(new Error('Upload failed'));
|
||||
}
|
||||
};
|
||||
xhr.onerror = () => reject(new Error('Upload failed'));
|
||||
});
|
||||
|
||||
// 发送到文件存储服务
|
||||
xhr.open('POST', 'https://files.aiot.ml/upload');
|
||||
xhr.send(formData);
|
||||
|
||||
await uploadPromise;
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to re-upload file: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 文件确认存在后,触发分析
|
||||
const response = await fetch(`${API_BASE_URL}/upload`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
files: [{
|
||||
filename: paperTitle,
|
||||
hash: fileHash
|
||||
}]
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to process file');
|
||||
}
|
||||
|
||||
// 等待一段时间后刷新列表
|
||||
setTimeout(async () => {
|
||||
await loadPapers();
|
||||
}, 2000);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Retry analysis failed:', error);
|
||||
const contentElement = document.querySelector(`.analysis-content-${fileHash}`);
|
||||
const statusElement = document.querySelector(`.analysis-status-${fileHash}`);
|
||||
|
||||
contentElement.innerHTML = `
|
||||
<div class="text-danger mb-2">
|
||||
<i class="bi bi-exclamation-triangle me-2"></i>
|
||||
Retry failed
|
||||
</div>
|
||||
<button class="btn btn-primary btn-sm" onclick="retryAnalysis('${fileHash}', '${paperTitle}')">
|
||||
<i class="bi bi-arrow-clockwise me-1"></i>
|
||||
Retry Analysis
|
||||
</button>
|
||||
`;
|
||||
statusElement.innerHTML = `<span class="text-danger">Retry failed: ${error.message}</span>`;
|
||||
}
|
||||
}
|
||||
// 在页面加载完成后自动显示文献列表
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
showPaperList();
|
||||
|
||||
@@ -16,12 +16,12 @@ import PyPDF2
|
||||
import aiohttp
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from openai import OpenAI
|
||||
import hashlib
|
||||
|
||||
# Database Configuration
|
||||
MONGODB_URL = "mongodb://paper:SYX7cdJNMRbiytra@222.186.10.253:27017/paper"
|
||||
REDIS_URL = "redis://:Obscura@2024@222.186.10.253:6379"
|
||||
|
||||
upload_path = "/obscura/task/papers"
|
||||
|
||||
# Database connection
|
||||
class Database:
|
||||
@@ -99,19 +99,35 @@ async def delete_paper(
|
||||
db = await get_database()
|
||||
|
||||
try:
|
||||
|
||||
# 获取文献信息
|
||||
paper = await db.papers.find_one({"_id": ObjectId(paper_id)})
|
||||
if not paper:
|
||||
raise HTTPException(status_code=404, detail="paper not found")
|
||||
|
||||
# 删除文件
|
||||
if os.path.exists(paper["paper_link"]):
|
||||
os.remove(paper["paper_link"])
|
||||
|
||||
# 从文件存储服务删除文件
|
||||
file_hash = paper.get("file_hash")
|
||||
# 删除数据库记录
|
||||
await db.papers.delete_one({"_id": ObjectId(paper_id)})
|
||||
|
||||
# 删除Redis中的分析报告(如果存在)
|
||||
try:
|
||||
redis = await get_redis()
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{file_hash}"
|
||||
await redis.delete(report_key)
|
||||
|
||||
# 删除聊天历史(如果存在)
|
||||
await redis.select(191)
|
||||
chat_history_key = f"chat_history:{paper_id}"
|
||||
await redis.delete(chat_history_key)
|
||||
except Exception as redis_error:
|
||||
print(f"Warning: Failed to delete Redis keys: {str(redis_error)}")
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
return {"message": "paper successfully deleted"}
|
||||
|
||||
except Exception as e:
|
||||
@@ -130,7 +146,8 @@ async def get_papers():
|
||||
"_id": str(ref["_id"]),
|
||||
"paper_link": ref["paper_link"],
|
||||
"paper_title": ref["paper_title"],
|
||||
"upload_time": ref["upload_time"]
|
||||
"upload_time": ref["upload_time"],
|
||||
"file_hash": ref.get("file_hash")
|
||||
})
|
||||
return papers
|
||||
|
||||
@@ -138,17 +155,17 @@ async def get_papers():
|
||||
print(f"Error getting papers: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/paper/{paper_id}/report")
|
||||
@app.get("/paper/report/{file_hash}")
|
||||
async def get_report(
|
||||
paper_id: str
|
||||
file_hash: str
|
||||
):
|
||||
"""从 Redis db190 读取已保存的文献报告"""
|
||||
"""从 Redis db190 直接通过文件哈希值读取已保存的文献报告"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
# 选择 db190
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{paper_id}"
|
||||
report_key = f"paper_report:{file_hash}"
|
||||
|
||||
# 获取已保存的报告
|
||||
existing_report = await redis.get(report_key)
|
||||
@@ -168,77 +185,103 @@ async def get_report(
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
class FileUpload(BaseModel):
|
||||
filename: str
|
||||
hash: str
|
||||
|
||||
class BatchUploadRequest(BaseModel):
|
||||
files: List[FileUpload]
|
||||
|
||||
@app.post("/paper/upload")
|
||||
async def batch_upload(
|
||||
files: List[UploadFile] = File(...)
|
||||
request: BatchUploadRequest
|
||||
):
|
||||
"""批量上传项目相关文献"""
|
||||
db = await get_database()
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
|
||||
uploaded_papers = []
|
||||
papers_to_analyze = []
|
||||
|
||||
# 批量上传文件
|
||||
for file in files:
|
||||
# 验证文件类型
|
||||
allowed_types = ["application/pdf", "application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
|
||||
if file.content_type not in allowed_types:
|
||||
continue # 跳过不支持的文件类型
|
||||
|
||||
# 确保上传目录存在
|
||||
os.makedirs(upload_path, exist_ok=True)
|
||||
|
||||
# 使用原始文件名
|
||||
file_path = os.path.join(upload_path, file.filename)
|
||||
|
||||
# 保存文件
|
||||
with open(file_path, "wb") as buffer:
|
||||
content = await file.read()
|
||||
buffer.write(content)
|
||||
|
||||
# 创建记录
|
||||
paper = {
|
||||
"paper_link": file_path,
|
||||
"paper_title": file.filename,
|
||||
"upload_time": datetime.now(timezone.utc)
|
||||
}
|
||||
|
||||
result = await db.papers.insert_one(paper)
|
||||
paper_info = {
|
||||
"paper_id": str(result.inserted_id),
|
||||
"file_path": file_path,
|
||||
"paper_title": paper["paper_title"]
|
||||
}
|
||||
uploaded_papers.append(paper_info)
|
||||
|
||||
# 为每个文献创建初始状态
|
||||
redis = await get_redis()
|
||||
# 批量处理文件
|
||||
for file in request.files:
|
||||
try:
|
||||
# 检查文件是否已存在分析报告
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{str(result.inserted_id)}"
|
||||
initial_status = {
|
||||
"status": "processing",
|
||||
"message": "Analysis in progress"
|
||||
report_key = f"paper_report:{file.hash}"
|
||||
existing_report = await redis.get(report_key)
|
||||
print(f"[Redis] existing_report: {existing_report}")
|
||||
print(f"[Redis] file.hash: {file.hash}")
|
||||
if existing_report:
|
||||
report_data = json.loads(existing_report)
|
||||
if report_data.get("status") == "completed":
|
||||
print(f"[Redis] Report already exists for file {file.filename} with hash {file.hash}")
|
||||
# 即使报告存在,也创建新的文献记录
|
||||
paper = {
|
||||
"paper_link": f"https://files.aiot.ml/pdf/content/{file.hash}",
|
||||
"paper_title": file.filename,
|
||||
"upload_time": datetime.now(timezone.utc),
|
||||
"file_hash": file.hash
|
||||
}
|
||||
|
||||
result = await db.papers.insert_one(paper)
|
||||
paper_info = {
|
||||
"paper_id": str(result.inserted_id),
|
||||
"file_hash": file.hash,
|
||||
"paper_title": paper["paper_title"]
|
||||
}
|
||||
uploaded_papers.append(paper_info)
|
||||
continue
|
||||
|
||||
# 创建记录
|
||||
paper = {
|
||||
"paper_link": f"https://files.aiot.ml/pdf/content/{file.hash}",
|
||||
"paper_title": file.filename,
|
||||
"upload_time": datetime.now(timezone.utc),
|
||||
"file_hash": file.hash
|
||||
}
|
||||
await redis.set(report_key, json.dumps(initial_status))
|
||||
finally:
|
||||
await redis.aclose()
|
||||
|
||||
# 在后台启动分析任务
|
||||
if uploaded_papers:
|
||||
asyncio.create_task(batch_analysis(uploaded_papers))
|
||||
result = await db.papers.insert_one(paper)
|
||||
paper_info = {
|
||||
"paper_id": str(result.inserted_id),
|
||||
"file_hash": file.hash,
|
||||
"paper_title": paper["paper_title"]
|
||||
}
|
||||
uploaded_papers.append(paper_info)
|
||||
|
||||
# 如果没有现有报告或报告未完成,创建初始状态并添加到待分析列表
|
||||
if not existing_report or json.loads(existing_report).get("status") != "completed":
|
||||
initial_status = {
|
||||
"status": "processing",
|
||||
"message": "Analysis in progress"
|
||||
}
|
||||
await redis.set(report_key, json.dumps(initial_status))
|
||||
papers_to_analyze.append(paper_info)
|
||||
|
||||
except Exception as file_error:
|
||||
print(f"处理文件 {file.filename} 时出错: {str(file_error)}")
|
||||
continue
|
||||
|
||||
# 只对没有报告的文件启动分析任务
|
||||
if papers_to_analyze:
|
||||
asyncio.create_task(batch_analysis(papers_to_analyze))
|
||||
print(f"[Redis] 报告不存在,开始分析: {papers_to_analyze}")
|
||||
return {
|
||||
"message": f"Successfully uploaded {len(uploaded_papers)} files",
|
||||
"uploaded_files": uploaded_papers
|
||||
"uploaded_files": uploaded_papers,
|
||||
"files_to_analyze": len(papers_to_analyze)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Batch upload error: {str(e)}")
|
||||
print(f"批量上传错误: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
|
||||
async def batch_analysis(papers: List[dict]):
|
||||
"""批量处理文献分析的后台任务"""
|
||||
@@ -251,24 +294,40 @@ async def batch_analysis(papers: List[dict]):
|
||||
async with semaphore:
|
||||
try:
|
||||
paper_id = ref["paper_id"]
|
||||
file_path = ref["file_path"]
|
||||
file_hash = ref["file_hash"]
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
print(f"paper file not found: {file_path}")
|
||||
return
|
||||
# 再次检查报告是否存在(以防在开始分析前已经被其他进程分析)
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{file_hash}"
|
||||
existing_report = await redis.get(report_key)
|
||||
if existing_report:
|
||||
report_data = json.loads(existing_report)
|
||||
if report_data.get("status") == "completed":
|
||||
print(f"Report already exists for file hash: {file_hash}, skipping analysis")
|
||||
return
|
||||
# 从文件存储服务获取PDF内容
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f'https://files.aiot.ml/pdf/content/{file_hash}') as response:
|
||||
if response.status != 200:
|
||||
print(f"[Redis] 获取PDF内容失败: {response.status}")
|
||||
raise Exception(f"Failed to get PDF content for file hash: {file_hash}")
|
||||
|
||||
pdf_content = await response.json()
|
||||
if not pdf_content.get('content'):
|
||||
raise Exception("No PDF content returned")
|
||||
|
||||
print(f"\n开始处理文献 {ref.get('paper_title', '未知标题')}")
|
||||
print(f"文献ID: {paper_id}")
|
||||
print(f"文件路径: {file_path}")
|
||||
print(f"文件哈希: {file_hash}")
|
||||
|
||||
# 异步读取PDF
|
||||
print("\n=== 步骤1: 读取PDF文件 ===")
|
||||
pdf_content = await read_pdf(file_path)
|
||||
if not pdf_content:
|
||||
raise Exception("Failed to read PDF content")
|
||||
# 使用获取到的PDF内容继续处理
|
||||
content = pdf_content['content']
|
||||
# 如果content是列表,将其合并为单个字符串
|
||||
if isinstance(content, list):
|
||||
content = '\n'.join(content)
|
||||
|
||||
# 打印字符数
|
||||
content_length = len(pdf_content)
|
||||
content_length = len(content)
|
||||
print(f"\n=== 步骤2: 内容长度检查 ===")
|
||||
print(f"PDF内容总字符数: {content_length}")
|
||||
|
||||
@@ -276,34 +335,19 @@ async def batch_analysis(papers: List[dict]):
|
||||
if content_length <= 200000:
|
||||
print(f"\n=== 步骤3A: 使用直接分析方式 ===")
|
||||
print(f"文档长度在处理范围内 ({content_length} <= 200000)")
|
||||
# 直接分析文档内容
|
||||
print("开始分析文档内容...")
|
||||
document_analysis = await analyze_paper(pdf_content[:180000])
|
||||
document_analysis = await analyze_paper(content[:180000])
|
||||
if not document_analysis:
|
||||
raise Exception("Failed to analyze document")
|
||||
print("文档分析完成")
|
||||
else:
|
||||
print(f"\n=== 步骤3B: 使用分段分析方式 ===")
|
||||
print(f"文档超过200000字符 ({content_length} > 200000)")
|
||||
# 分段分析长文档
|
||||
print("\n--- 开始分段分析 ---")
|
||||
print("正在调用 analyze_long_file...")
|
||||
try:
|
||||
analysis_results = await analyze_long_file(pdf_content)
|
||||
except Exception as e:
|
||||
print(f"分段分析过程中出错: {str(e)}")
|
||||
raise
|
||||
|
||||
analysis_results = await analyze_long_file(content)
|
||||
if not analysis_results:
|
||||
raise Exception("Failed to analyze document in segments")
|
||||
print(f"分段分析完成,共分析了 {len(analysis_results)} 个段落")
|
||||
|
||||
# 合并分析结果
|
||||
try:
|
||||
document_analysis = await merge_results(analysis_results)
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
document_analysis = await merge_results(analysis_results)
|
||||
if not document_analysis:
|
||||
raise Exception("Failed to merge analysis results")
|
||||
|
||||
@@ -311,12 +355,7 @@ async def batch_analysis(papers: List[dict]):
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# 异步分析文献价值
|
||||
try:
|
||||
value_evaluation = await paper_value(document_analysis)
|
||||
print(f"paper_value 返回结果类型: {type(value_evaluation)}")
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
value_evaluation = await paper_value(document_analysis)
|
||||
if not value_evaluation:
|
||||
raise Exception("Failed to evaluate value")
|
||||
print("文献价值分析完成")
|
||||
@@ -331,13 +370,12 @@ async def batch_analysis(papers: List[dict]):
|
||||
|
||||
# 保存结果
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{paper_id}"
|
||||
await redis.set(report_key, json.dumps(analysis_result))
|
||||
|
||||
except Exception as e:
|
||||
try:
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{ref['paper_id']}"
|
||||
report_key = f"paper_report:{file_hash}"
|
||||
error_status = {
|
||||
"status": "failed",
|
||||
"message": str(e)
|
||||
@@ -901,6 +939,37 @@ async def get_paper_qa_history(
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
# 添加新的路由,通过哈希值获取报告
|
||||
@app.get("/paper/check/{file_hash}")
|
||||
async def check_report(
|
||||
file_hash: str
|
||||
):
|
||||
"""直接通过文件哈希值从 Redis db190 读取已保存的文献报告"""
|
||||
redis = await get_redis()
|
||||
|
||||
try:
|
||||
# 选择 db190
|
||||
await redis.select(190)
|
||||
report_key = f"paper_report:{file_hash}"
|
||||
|
||||
# 获取已保存的报告
|
||||
existing_report = await redis.get(report_key)
|
||||
|
||||
if not existing_report:
|
||||
return {"status": "not_found"}
|
||||
|
||||
# 返回报告
|
||||
return json.loads(existing_report)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting paper report by hash: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
try:
|
||||
await redis.aclose()
|
||||
except Exception as e:
|
||||
print(f"Error closing Redis connection: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=9005)
|
||||
|
||||
Reference in New Issue
Block a user