{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e03ab29a-7cfb-44e9-a045-34d68f1e94bb",
   "metadata": {},
   "source": [
    "# 假设用户指定请求某篇文章。\n",
    "## 我们通过ID 获取该文献的简介与PDF文件。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "81b9f218-85c2-4982-8f19-ee3849a9e31a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'title': 'Constraining effective equation of state in $f(Q,T)$ gravity', 'summary': \"  New high-precision observations are now possible to constrain different\\ngravity theories. To examine the accelerated expansion of the Universe, we used\\nthe newly proposed $f(Q,T)$ gravity, where $Q$ is the non-metricity, and $T$ is\\nthe trace of the energy-momentum tensor. The investigation is carried out using\\na parameterized effective equation of state with two parameters, $m$ and $n$.\\nWe have also considered the linear form of $f(Q,T)= Q+bT$, where $b$ is\\nconstant. By constraining the model with the recently published 1048 Pantheon\\nsample, we were able to find the best fitting values for the parameters $b$,\\n$m$, and $n$. The model appears to be in good agreement with the observations.\\nFinally, we analyzed the behavior of the deceleration parameter and equation of\\nstate parameter. The results support the feasibility of $f(Q,T)$ as a promising\\ntheory of gravity, illuminating a new direction towards explaining the\\nUniverse's dark sector.\\n\", 'pdf_url': 'http://arxiv.org/pdf/2104.00001v2.pdf'}\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "from xml.etree import ElementTree\n",
    "\n",
    "def get_arxiv_paper_info(arxiv_id):\n",
    "    # 构建请求URL\n",
    "    url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'\n",
    "    \n",
    "    # 发送GET请求\n",
    "    response = requests.get(url)\n",
    "    \n",
    "    # 解析返回的XML数据\n",
    "    root = ElementTree.fromstring(response.content)\n",
    "    \n",
    "    # 初始化结果字典\n",
    "    paper_info = {'title': '', 'summary': '', 'pdf_url': ''}\n",
    "    \n",
    "    # 提取文章信息\n",
    "    for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):\n",
    "        paper_info['title'] = entry.find('{http://www.w3.org/2005/Atom}title').text\n",
    "        paper_info['summary'] = entry.find('{http://www.w3.org/2005/Atom}summary').text\n",
    "        # 查找并提取PDF链接\n",
    "        for link in entry.findall('{http://www.w3.org/2005/Atom}link'):\n",
    "            if link.attrib.get('title') == 'pdf':\n",
    "                paper_info['pdf_url'] = link.attrib.get('href') + '.pdf'\n",
    "                \n",
    "    return paper_info\n",
    "\n",
    "# 示例：使用arXiv ID获取文章信息\n",
    "arxiv_id = '2104.00001'  # 这里替换成实际的arXiv ID\n",
    "paper_info = get_arxiv_paper_info(arxiv_id)\n",
    "print(paper_info)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a4e3fe2-f965-4030-b175-14ef02a6863b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import fitz  # PyMuPDF\n",
    "\n",
    "def download_pdf(url, filename):\n",
    "    response = requests.get(url)\n",
    "    with open(filename, 'wb') as f:\n",
    "        f.write(response.content)\n",
    "\n",
    "def parse_pdf(filename):\n",
    "    # 打开PDF文件\n",
    "    doc = fitz.open(filename)\n",
    "    \n",
    "    # 遍历每一页\n",
    "    for page_num in range(len(doc)):\n",
    "        page = doc.load_page(page_num)\n",
    "        print(page.get_text())\n",
    "\n",
    "# 使用上面获得的PDF URL下载PDF\n",
    "pdf_url = paper_info['pdf_url']\n",
    "filename = 'downloaded_paper.pdf'\n",
    "download_pdf(pdf_url, filename)\n",
    "\n",
    "# 解析并打印PDF内容\n",
    "parse_pdf(filename)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}