From cc4b15efb50d4e51539253399d712e8d743ade48 Mon Sep 17 00:00:00 2001 From: Subramanya N Date: Mon, 14 Feb 2022 05:02:19 -0500 Subject: [PATCH] updated with github links from paperswithcode While running the bash script, it check if a code base is updated on paperswithcode.com and updates it. --- arxiv_daemon.py | 18 ++++++++++++++++++ aslite/paperswithcode.py | 23 +++++++++++++++++++++++ bash_job.sh | 10 ++++++++++ 3 files changed, 51 insertions(+) create mode 100644 aslite/paperswithcode.py create mode 100755 bash_job.sh diff --git a/arxiv_daemon.py b/arxiv_daemon.py index 6ce68c7..2c93317 100644 --- a/arxiv_daemon.py +++ b/arxiv_daemon.py @@ -12,6 +12,7 @@ import argparse from aslite.arxiv import get_response, parse_response from aslite.db import get_papers_db, get_metas_db +from aslite.paperswithcode import getGithubLink if __name__ == '__main__': @@ -68,7 +69,24 @@ if __name__ == '__main__': nhad, nnew, nreplace = 0, 0, 0 for p in papers: pid = p['_id'] + # add github_links to p + p['github_links'] = getGithubLink(pid) + # get github link from paperswithcode.com if pid in pdb: + # check if old papers have github_links + if 'github_links' in pdb[pid]: + # replace if github has been updated by paperswithcode.com + if p['github_links'] != pdb[pid]['github_links']: + # replace, this one is newer + store(p) + nreplace += 1 + continue + else: + # replace, this one is newer + store(p) + nreplace += 1 + continue + if p['_time'] > pdb[pid]['_time']: # replace, this one is newer store(p) diff --git a/aslite/paperswithcode.py b/aslite/paperswithcode.py new file mode 100644 index 0000000..56b323a --- /dev/null +++ b/aslite/paperswithcode.py @@ -0,0 +1,23 @@ +import requests + +def getGithubLink(arxiv_id): + urls = [] + base_url = "https://paperswithcode.com/api/v1/" + add_paper_id = "papers?arxiv_id=%s" %(arxiv_id) + search_query = base_url + add_paper_id + request = requests.get(search_query) + if request: + for _data in request.json()["results"]: + _id = _data['id'] + add_id = "papers/%s/repositories/" %(_id) + search_url = base_url + add_id + request = requests.get(search_url) + if request: + for _urlInfo in request.json()["results"]: + if _urlInfo['is_official']: + urls.append(_urlInfo['url']) + else: + return None + else: + return None + return urls diff --git a/bash_job.sh b/bash_job.sh new file mode 100755 index 0000000..3a60b7b --- /dev/null +++ b/bash_job.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +python3 arxiv_daemon.py --num 100 + +if [ $? -eq 0 ]; then + echo "New papers detected! Running compute.py" + python3 compute.py +else + echo "No new papers were added, skipping feature computation" +fi