mirror of
https://github.com/zots0127/paper_dataset.git
synced 2026-01-13 23:37:29 +08:00
116 lines
4.3 KiB
Plaintext
116 lines
4.3 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e03ab29a-7cfb-44e9-a045-34d68f1e94bb",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 假设用户指定请求某篇文章。\n",
|
||
"## 我们通过ID 获取该文献的简介与PDF文件。"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "81b9f218-85c2-4982-8f19-ee3849a9e31a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'title': 'Constraining effective equation of state in $f(Q,T)$ gravity', 'summary': \" New high-precision observations are now possible to constrain different\\ngravity theories. To examine the accelerated expansion of the Universe, we used\\nthe newly proposed $f(Q,T)$ gravity, where $Q$ is the non-metricity, and $T$ is\\nthe trace of the energy-momentum tensor. The investigation is carried out using\\na parameterized effective equation of state with two parameters, $m$ and $n$.\\nWe have also considered the linear form of $f(Q,T)= Q+bT$, where $b$ is\\nconstant. By constraining the model with the recently published 1048 Pantheon\\nsample, we were able to find the best fitting values for the parameters $b$,\\n$m$, and $n$. The model appears to be in good agreement with the observations.\\nFinally, we analyzed the behavior of the deceleration parameter and equation of\\nstate parameter. The results support the feasibility of $f(Q,T)$ as a promising\\ntheory of gravity, illuminating a new direction towards explaining the\\nUniverse's dark sector.\\n\", 'pdf_url': 'http://arxiv.org/pdf/2104.00001v2.pdf'}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import requests\n",
|
||
"from xml.etree import ElementTree\n",
|
||
"\n",
|
||
"def get_arxiv_paper_info(arxiv_id):\n",
|
||
" # 构建请求URL\n",
|
||
" url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'\n",
|
||
" \n",
|
||
" # 发送GET请求\n",
|
||
" response = requests.get(url)\n",
|
||
" \n",
|
||
" # 解析返回的XML数据\n",
|
||
" root = ElementTree.fromstring(response.content)\n",
|
||
" \n",
|
||
" # 初始化结果字典\n",
|
||
" paper_info = {'title': '', 'summary': '', 'pdf_url': ''}\n",
|
||
" \n",
|
||
" # 提取文章信息\n",
|
||
" for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):\n",
|
||
" paper_info['title'] = entry.find('{http://www.w3.org/2005/Atom}title').text\n",
|
||
" paper_info['summary'] = entry.find('{http://www.w3.org/2005/Atom}summary').text\n",
|
||
" # 查找并提取PDF链接\n",
|
||
" for link in entry.findall('{http://www.w3.org/2005/Atom}link'):\n",
|
||
" if link.attrib.get('title') == 'pdf':\n",
|
||
" paper_info['pdf_url'] = link.attrib.get('href') + '.pdf'\n",
|
||
" \n",
|
||
" return paper_info\n",
|
||
"\n",
|
||
"# 示例:使用arXiv ID获取文章信息\n",
|
||
"arxiv_id = '2104.00001' # 这里替换成实际的arXiv ID\n",
|
||
"paper_info = get_arxiv_paper_info(arxiv_id)\n",
|
||
"print(paper_info)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0a4e3fe2-f965-4030-b175-14ef02a6863b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"import fitz # PyMuPDF\n",
|
||
"\n",
|
||
"def download_pdf(url, filename):\n",
|
||
" response = requests.get(url)\n",
|
||
" with open(filename, 'wb') as f:\n",
|
||
" f.write(response.content)\n",
|
||
"\n",
|
||
"def parse_pdf(filename):\n",
|
||
" # 打开PDF文件\n",
|
||
" doc = fitz.open(filename)\n",
|
||
" \n",
|
||
" # 遍历每一页\n",
|
||
" for page_num in range(len(doc)):\n",
|
||
" page = doc.load_page(page_num)\n",
|
||
" print(page.get_text())\n",
|
||
"\n",
|
||
"# 使用上面获得的PDF URL下载PDF\n",
|
||
"pdf_url = paper_info['pdf_url']\n",
|
||
"filename = 'downloaded_paper.pdf'\n",
|
||
"download_pdf(pdf_url, filename)\n",
|
||
"\n",
|
||
"# 解析并打印PDF内容\n",
|
||
"parse_pdf(filename)\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.2"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|