diff --git a/.gitignore b/.gitignore
index 41b77e6..e09ffb8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 .DS_Store
 .ipynb_checkpoints
 __pycache__
+data
+*.ipynb
diff --git a/aslite/db.py b/aslite/db.py
index 14aca0c..ecef5c6 100644
--- a/aslite/db.py
+++ b/aslite/db.py
@@ -4,9 +4,12 @@ The idea is that none of the individual scripts deal directly with the file syst
 Any of the file system I/O and the associated settings are in this single file.
 """
 
+import os
 import sqlite3, zlib, pickle
 from sqlitedict import SqliteDict
 
+DATA_DIR = 'data'
+
 # -----------------------------------------------------------------------------
 
 class CompressedSqliteDict(SqliteDict):
@@ -29,8 +32,10 @@ flag='c': default mode, open for read/write, and creating the db/table if necess
 flag='r': open for read-only
 """
 
-PAPERS_DB_FILE = 'papers.db' # stores info about papers, and also their lighter-weight metadata
-DICT_DB_FILE = 'dict.db' # stores account-relevant info, like which tags exist for which papers
+# stores info about papers, and also their lighter-weight metadata
+PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db')
+# stores account-relevant info, like which tags exist for which papers
+DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')
 
 def get_papers_db(flag='r', autocommit=True):
     assert flag in ['r', 'c']
@@ -52,7 +57,8 @@ def get_tags_db(flag='r', autocommit=True):
 our "feature store" is currently just a pickle file, may want to consider hdf5 in the future
 """
 
-FEATURES_FILE = 'features.p' # stores tfidf features a bunch of other metadata
+# stores tfidf features a bunch of other metadata
+FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')
 
 def save_features(features):
     """ takes the features dict and save it to disk in a simple pickle file """
diff --git a/data/readme.md b/data/readme.md
new file mode 100644
index 0000000..407a1e5
--- /dev/null
+++ b/data/readme.md
@@ -0,0 +1,2 @@
+This directory stores the database, sequestered away from the code and the main project directory.
+E.g. includes the arxiv paper metadata, the calculated tfidf features, and the user tags data.