minor logging changes

This commit is contained in:
Andrej Karpathy 2021-11-12 22:49:29 -08:00
parent 158ced647b
commit 7cbb90a480
2 changed files with 12 additions and 11 deletions

View File

@ -15,18 +15,19 @@ from aslite.db import get_papers_db, get_metas_db
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
parser = argparse.ArgumentParser(description='Arxiv Daemon')
parser.add_argument('-n', '--num', type=int, default=100, help='how many papers to fetch')
parser.add_argument('-s', '--start', type=int, default=0, help='start at what index')
args = parser.parse_args()
print(args)
logging.basicConfig(level=logging.INFO, format='%(name)s %(levelname)s %(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
# query string of papers to look for
q = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
pdb = get_papers_db(flag='c', autocommit=True)
mdb = get_metas_db(flag='c', autocommit=True)
pdb = get_papers_db(flag='c')
mdb = get_metas_db(flag='c')
prevn = len(pdb)
def store(p):
@ -47,11 +48,11 @@ if __name__ == '__main__':
if len(papers) == 100:
break # otherwise we have to try again
except Exception as e:
print(e)
print("will try again in a bit...")
logging.warning(e)
logging.warning("will try again in a bit...")
ntried += 1
if ntried > 1000:
print("ok we tried 1,000 times, something is srsly wrong. exitting.")
logging.error("ok we tried 1,000 times, something is srsly wrong. exitting.")
sys.exit()
time.sleep(2 + random.uniform(0, 4))
@ -73,10 +74,10 @@ if __name__ == '__main__':
nnew += 1
prevn = len(pdb)
# print some diagnostic information
print(papers[0]['_time_str'])
print("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" %
# some diagnostic information on how things are coming along
logging.info(papers[0]['_time_str'])
logging.info("k=%d, out of %d: had %d, replaced %d, new %d. now have: %d" %
(k, len(papers), nhad, nreplace, nnew, prevn))
# zzz
time.sleep(2 + random.uniform(0, 4))
time.sleep(1 + random.uniform(0, 3))

View File

@ -17,7 +17,7 @@ def get_response(search_query, start_index=0):
add_url = 'search_query=%s&sortBy=lastUpdatedDate&start=%d&max_results=100' % (search_query, start_index)
#add_url = 'search_query=%s&sortBy=submittedDate&start=%d&max_results=100' % (search_query, start_index)
search_query = base_url + add_url
logger.info(f"Searching arxiv for {search_query}")
logger.debug(f"Searching arxiv for {search_query}")
with urllib.request.urlopen(search_query) as url:
response = url.read()