# 假设用户指定请求某篇文章。
## 我们通过ID 获取该文献的简介与PDF文件。

In [2]:
import requests
from xml.etree import ElementTree

def get_arxiv_paper_info(arxiv_id):
    # 构建请求URL
    url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'
    
    # 发送GET请求
    response = requests.get(url)
    
    # 解析返回的XML数据
    root = ElementTree.fromstring(response.content)
    
    # 初始化结果字典
    paper_info = {'title': '', 'summary': '', 'pdf_url': ''}
    
    # 提取文章信息
    for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
        paper_info['title'] = entry.find('{http://www.w3.org/2005/Atom}title').text
        paper_info['summary'] = entry.find('{http://www.w3.org/2005/Atom}summary').text
        # 查找并提取PDF链接
        for link in entry.findall('{http://www.w3.org/2005/Atom}link'):
            if link.attrib.get('title') == 'pdf':
                paper_info['pdf_url'] = link.attrib.get('href') + '.pdf'
                
    return paper_info

# 示例：使用arXiv ID获取文章信息
arxiv_id = '2104.00001'  # 这里替换成实际的arXiv ID
paper_info = get_arxiv_paper_info(arxiv_id)
print(paper_info)


{'title': 'Constraining effective equation of state in $f(Q,T)$ gravity', 'summary': "  New high-precision observations are now possible to constrain different\ngravity theories. To examine the accelerated expansion of the Universe, we used\nthe newly proposed $f(Q,T)$ gravity, where $Q$ is the non-metricity, and $T$ is\nthe trace of the energy-momentum tensor. The investigation is carried out using\na parameterized effective equation of state with two parameters, $m$ and $n$.\nWe have also considered the linear form of $f(Q,T)= Q+bT$, where $b$ is\nconstant. By constraining the model with the recently published 1048 Pantheon\nsample, we were able to find the best fitting values for the parameters $b$,\n$m$, and $n$. The model appears to be in good agreement with the observations.\nFinally, we analyzed the behavior of the deceleration parameter and equation of\nstate parameter. The results support the feasibility of $f(Q,T)$ as a promising\ntheory of gravity, illuminating a new directi

In [None]:
import requests
import fitz  # PyMuPDF

def download_pdf(url, filename):
    response = requests.get(url)
    with open(filename, 'wb') as f:
        f.write(response.content)

def parse_pdf(filename):
    # 打开PDF文件
    doc = fitz.open(filename)
    
    # 遍历每一页
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        print(page.get_text())

# 使用上面获得的PDF URL下载PDF
pdf_url = paper_info['pdf_url']
filename = 'downloaded_paper.pdf'
download_pdf(pdf_url, filename)

# 解析并打印PDF内容
parse_pdf(filename)
