updated with github links from paperswithcode

While running the bash script, it check if a code base is updated on paperswithcode.com and updates it.
This commit is contained in:
Subramanya N 2022-02-14 05:02:19 -05:00
parent d7a303b410
commit cc4b15efb5
3 changed files with 51 additions and 0 deletions

View File

@ -12,6 +12,7 @@ import argparse
from aslite.arxiv import get_response, parse_response
from aslite.db import get_papers_db, get_metas_db
from aslite.paperswithcode import getGithubLink
if __name__ == '__main__':
@ -68,7 +69,24 @@ if __name__ == '__main__':
nhad, nnew, nreplace = 0, 0, 0
for p in papers:
pid = p['_id']
# add github_links to p
p['github_links'] = getGithubLink(pid)
# get github link from paperswithcode.com
if pid in pdb:
# check if old papers have github_links
if 'github_links' in pdb[pid]:
# replace if github has been updated by paperswithcode.com
if p['github_links'] != pdb[pid]['github_links']:
# replace, this one is newer
store(p)
nreplace += 1
continue
else:
# replace, this one is newer
store(p)
nreplace += 1
continue
if p['_time'] > pdb[pid]['_time']:
# replace, this one is newer
store(p)

23
aslite/paperswithcode.py Normal file
View File

@ -0,0 +1,23 @@
import requests
def getGithubLink(arxiv_id):
urls = []
base_url = "https://paperswithcode.com/api/v1/"
add_paper_id = "papers?arxiv_id=%s" %(arxiv_id)
search_query = base_url + add_paper_id
request = requests.get(search_query)
if request:
for _data in request.json()["results"]:
_id = _data['id']
add_id = "papers/%s/repositories/" %(_id)
search_url = base_url + add_id
request = requests.get(search_url)
if request:
for _urlInfo in request.json()["results"]:
if _urlInfo['is_official']:
urls.append(_urlInfo['url'])
else:
return None
else:
return None
return urls

10
bash_job.sh Executable file
View File

@ -0,0 +1,10 @@
#!/bin/bash
python3 arxiv_daemon.py --num 100
if [ $? -eq 0 ]; then
echo "New papers detected! Running compute.py"
python3 compute.py
else
echo "No new papers were added, skipping feature computation"
fi