ok now we can sequester all the database files into data/ folder so everything is nice and clean yay
This commit is contained in:
parent
77279e1777
commit
1ed6e3f1b0
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
__pycache__
|
__pycache__
|
||||||
|
data
|
||||||
|
*.ipynb
|
||||||
|
|||||||
12
aslite/db.py
12
aslite/db.py
@ -4,9 +4,12 @@ The idea is that none of the individual scripts deal directly with the file syst
|
|||||||
Any of the file system I/O and the associated settings are in this single file.
|
Any of the file system I/O and the associated settings are in this single file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import sqlite3, zlib, pickle
|
import sqlite3, zlib, pickle
|
||||||
from sqlitedict import SqliteDict
|
from sqlitedict import SqliteDict
|
||||||
|
|
||||||
|
DATA_DIR = 'data'
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
class CompressedSqliteDict(SqliteDict):
|
class CompressedSqliteDict(SqliteDict):
|
||||||
@ -29,8 +32,10 @@ flag='c': default mode, open for read/write, and creating the db/table if necess
|
|||||||
flag='r': open for read-only
|
flag='r': open for read-only
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PAPERS_DB_FILE = 'papers.db' # stores info about papers, and also their lighter-weight metadata
|
# stores info about papers, and also their lighter-weight metadata
|
||||||
DICT_DB_FILE = 'dict.db' # stores account-relevant info, like which tags exist for which papers
|
PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db')
|
||||||
|
# stores account-relevant info, like which tags exist for which papers
|
||||||
|
DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')
|
||||||
|
|
||||||
def get_papers_db(flag='r', autocommit=True):
|
def get_papers_db(flag='r', autocommit=True):
|
||||||
assert flag in ['r', 'c']
|
assert flag in ['r', 'c']
|
||||||
@ -52,7 +57,8 @@ def get_tags_db(flag='r', autocommit=True):
|
|||||||
our "feature store" is currently just a pickle file, may want to consider hdf5 in the future
|
our "feature store" is currently just a pickle file, may want to consider hdf5 in the future
|
||||||
"""
|
"""
|
||||||
|
|
||||||
FEATURES_FILE = 'features.p' # stores tfidf features a bunch of other metadata
|
# stores tfidf features a bunch of other metadata
|
||||||
|
FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')
|
||||||
|
|
||||||
def save_features(features):
|
def save_features(features):
|
||||||
""" takes the features dict and save it to disk in a simple pickle file """
|
""" takes the features dict and save it to disk in a simple pickle file """
|
||||||
|
|||||||
2
data/readme.md
Normal file
2
data/readme.md
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
This directory stores the database, sequestered away from the code and the main project directory.
|
||||||
|
E.g. includes the arxiv paper metadata, the calculated tfidf features, and the user tags data.
|
||||||
Loading…
Reference in New Issue
Block a user