diff --git a/arxiv_daemon.py b/arxiv_daemon.py index c00bfdc..de71089 100644 --- a/arxiv_daemon.py +++ b/arxiv_daemon.py @@ -15,18 +15,19 @@ from aslite.db import get_papers_db, get_metas_db if __name__ == '__main__': + logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') + parser = argparse.ArgumentParser(description='Arxiv Daemon') parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch') parser.add_argument('-s', '--start', type=int, default=0, help='start at what index') args = parser.parse_args() print(args) - logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') # query string of papers to look for q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO' - pdb = get_papers_db(flag='c', autocommit=True) - mdb = get_metas_db(flag='c', autocommit=True) + pdb = get_papers_db(flag='c') + mdb = get_metas_db(flag='c') prevn = len(pdb) def store(p): @@ -47,11 +48,11 @@ if __name__ == '__main__': if len(papers) == 100: break # otherwise we have to try again except Exception as e: - print(e) - print("will try again in a bit...") + logging.warning(e) + logging.warning("will try again in a bit...") ntried += 1 if ntried > 1000: - print("ok we tried 1,000 times, something is srsly wrong. exitting.") + logging.error("ok we tried 1,000 times, something is srsly wrong. exitting.") sys.exit() time.sleep(2 + random.uniform(0, 4)) @@ -73,10 +74,10 @@ if __name__ == '__main__': nnew += 1 prevn = len(pdb) - # print some diagnostic information - print(papers[0]['_time_str']) - print("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" % + # some diagnostic information on how things are coming along + logging.info(papers[0]['_time_str']) + logging.info("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" % (k, len(papers), nhad, nreplace, nnew, prevn)) # zzz - time.sleep(2 + random.uniform(0, 4)) + time.sleep(1 + random.uniform(0, 3)) diff --git a/aslite/arxiv.py b/aslite/arxiv.py index 22973f4..1514324 100644 --- a/aslite/arxiv.py +++ b/aslite/arxiv.py @@ -17,7 +17,7 @@ def get_response(search_query, start_index=0): add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index) #add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index) search_query = base_url + add_url - logger.info(f"Searching arxiv for {search_query}") + logger.debug(f"Searching arxiv for {search_query}") with urllib.request.urlopen(search_query) as url: response = url.read()