{ "cells": [ { "cell_type": "markdown", "id": "e03ab29a-7cfb-44e9-a045-34d68f1e94bb", "metadata": {}, "source": [ "# 假设用户指定请求某篇文章。\n", "## 我们通过ID 获取该文献的简介与PDF文件。" ] }, { "cell_type": "code", "execution_count": 2, "id": "81b9f218-85c2-4982-8f19-ee3849a9e31a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'title': 'Constraining effective equation of state in $f(Q,T)$ gravity', 'summary': \" New high-precision observations are now possible to constrain different\\ngravity theories. To examine the accelerated expansion of the Universe, we used\\nthe newly proposed $f(Q,T)$ gravity, where $Q$ is the non-metricity, and $T$ is\\nthe trace of the energy-momentum tensor. The investigation is carried out using\\na parameterized effective equation of state with two parameters, $m$ and $n$.\\nWe have also considered the linear form of $f(Q,T)= Q+bT$, where $b$ is\\nconstant. By constraining the model with the recently published 1048 Pantheon\\nsample, we were able to find the best fitting values for the parameters $b$,\\n$m$, and $n$. The model appears to be in good agreement with the observations.\\nFinally, we analyzed the behavior of the deceleration parameter and equation of\\nstate parameter. The results support the feasibility of $f(Q,T)$ as a promising\\ntheory of gravity, illuminating a new direction towards explaining the\\nUniverse's dark sector.\\n\", 'pdf_url': 'http://arxiv.org/pdf/2104.00001v2.pdf'}\n" ] } ], "source": [ "import requests\n", "from xml.etree import ElementTree\n", "\n", "def get_arxiv_paper_info(arxiv_id):\n", " # 构建请求URL\n", " url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'\n", " \n", " # 发送GET请求\n", " response = requests.get(url)\n", " \n", " # 解析返回的XML数据\n", " root = ElementTree.fromstring(response.content)\n", " \n", " # 初始化结果字典\n", " paper_info = {'title': '', 'summary': '', 'pdf_url': ''}\n", " \n", " # 提取文章信息\n", " for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):\n", " paper_info['title'] = entry.find('{http://www.w3.org/2005/Atom}title').text\n", " paper_info['summary'] = entry.find('{http://www.w3.org/2005/Atom}summary').text\n", " # 查找并提取PDF链接\n", " for link in entry.findall('{http://www.w3.org/2005/Atom}link'):\n", " if link.attrib.get('title') == 'pdf':\n", " paper_info['pdf_url'] = link.attrib.get('href') + '.pdf'\n", " \n", " return paper_info\n", "\n", "# 示例:使用arXiv ID获取文章信息\n", "arxiv_id = '2104.00001' # 这里替换成实际的arXiv ID\n", "paper_info = get_arxiv_paper_info(arxiv_id)\n", "print(paper_info)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "0a4e3fe2-f965-4030-b175-14ef02a6863b", "metadata": {}, "outputs": [], "source": [ "import requests\n", "import fitz # PyMuPDF\n", "\n", "def download_pdf(url, filename):\n", " response = requests.get(url)\n", " with open(filename, 'wb') as f:\n", " f.write(response.content)\n", "\n", "def parse_pdf(filename):\n", " # 打开PDF文件\n", " doc = fitz.open(filename)\n", " \n", " # 遍历每一页\n", " for page_num in range(len(doc)):\n", " page = doc.load_page(page_num)\n", " print(page.get_text())\n", "\n", "# 使用上面获得的PDF URL下载PDF\n", "pdf_url = paper_info['pdf_url']\n", "filename = 'downloaded_paper.pdf'\n", "download_pdf(pdf_url, filename)\n", "\n", "# 解析并打印PDF内容\n", "parse_pdf(filename)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }