ok now we can sequester all the database files into data/ folder so everything is nice and clean yay

2021-11-25 13:47:45 -08:00 · 2021-11-25 13:47:45 -08:00 · 1ed6e3f1b0
commit 1ed6e3f1b0
parent 77279e1777
3 changed files with 13 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 .DS_Store
 .ipynb_checkpoints
 __pycache__
+data
+*.ipynb
--- a/aslite/db.py
+++ b/aslite/db.py
@ -4,9 +4,12 @@ The idea is that none of the individual scripts deal directly with the file syst
 Any of the file system I/O and the associated settings are in this single file.
 """

+import os
 import sqlite3, zlib, pickle
 from sqlitedict import SqliteDict

+DATA_DIR = 'data'
+
 # -----------------------------------------------------------------------------

 class CompressedSqliteDict(SqliteDict):
@ -29,8 +32,10 @@ flag='c': default mode, open for read/write, and creating the db/table if necess
 flag='r': open for read-only
 """

-PAPERS_DB_FILE = 'papers.db' # stores info about papers, and also their lighter-weight metadata
-DICT_DB_FILE = 'dict.db' # stores account-relevant info, like which tags exist for which papers
+# stores info about papers, and also their lighter-weight metadata
+PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db')
+# stores account-relevant info, like which tags exist for which papers
+DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')

 def get_papers_db(flag='r', autocommit=True):
    assert flag in ['r', 'c']
@ -52,7 +57,8 @@ def get_tags_db(flag='r', autocommit=True):
 our "feature store" is currently just a pickle file, may want to consider hdf5 in the future
 """

-FEATURES_FILE = 'features.p' # stores tfidf features a bunch of other metadata
+# stores tfidf features a bunch of other metadata
+FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')

 def save_features(features):
    """ takes the features dict and save it to disk in a simple pickle file """
--- a/data/readme.md
+++ b/data/readme.md
@ -0,0 +1,2 @@
+This directory stores the database, sequestered away from the code and the main project directory.
+E.g. includes the arxiv paper metadata, the calculated tfidf features, and the user tags data.