diff --git a/database.py b/database.py index d3b8546..ef95c8e 100644 --- a/database.py +++ b/database.py @@ -3,11 +3,9 @@ import datajoint as dj import nixio as nix import os import glob -from util import read_info_file, find_key_recursive, deep_get +from util import read_info_file, find_key_recursive, deep_get, read_dataset_info from IPython import embed -import datetime as dt -data_dir = 'data' schema = dj.schema("fish_book", locals()) @@ -62,14 +60,13 @@ class Dataset(dj.Manual): return sane - @schema class Subject(dj.Manual): definition = """ # Subject subject_id : varchar(256) ---- - species : varchar(256) + species : varchar(256) """ @staticmethod @@ -112,7 +109,7 @@ class SubjectProperties(dj.Manual): definition = """ # SubjectProperties id : int auto_increment - ---- + ---- -> Subject recording_date : date weight : float @@ -121,7 +118,8 @@ class SubjectProperties(dj.Manual): """ def get_template_tuple(id=None): - tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0) + tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, + eod_frequency=0.0) if id is not None: return dict((SubjectProperties() & {"id": id}).fetch1()) return tup @@ -131,11 +129,9 @@ class SubjectProperties(dj.Manual): class Cell(dj.Manual): definition = """ # Table that stores information about recorded cells. - id : int auto_increment + cell_id : varchar(256) ---- -> Subject - -> Dataset - cell_name : varchar(256) cell_type : varchar(256) firing_rate : float structure : varchar(256) @@ -148,41 +144,22 @@ class Cell(dj.Manual): @staticmethod def get_template_tuple(cell_id=None): - tup = dict(id=None, dataset_id=None, subject_id=None, cell_name="", cell_type="", firing_rate=0.0, depth=0.0, - region="", subregion="", structure="", lateral_pos=0.0, transversal_section=0.0) + tup = dict(cell_id=None, subject_id=None, cell_type="", firing_rate=0.0, + depth=0.0, region="", subregion="", structure="", + lateral_pos=0.0, transversal_section=0.0) if cell_id is not None: d = dict((Cell() & {"cell_id": cell_id}).fetch1()) return d return tup -def read_info(info_file): - exp = "" - quality = "" - comment = "" - rec_date = None - has_nix = False - if not os.path.exists(info_file): - return exp, rec_date, quality, comment, has_nix - has_nix = len(glob.glob(os.path.sep.join(info_file.split(os.path.sep)[:-1]) + os.path.sep + "*.nix")) > 0 - info = read_info_file(info_file) - p = [] - find_key_recursive(info, "Experimenter", p) - if len(p) > 0: - exp = deep_get(info, p) - p = [] - find_key_recursive(info, "Date", p) - if len(p) > 0: - rec_date = dt.date.fromisoformat(deep_get(info, p)) - p = [] - find_key_recursive(info, "Recording quality", p) - if len(p) > 0: - quality = deep_get(info, p) - find_key_recursive(info, "Comment", p) - if len(p) > 0: - comment = deep_get(info, p, default="") - - return exp, rec_date, quality, comment, has_nix +@schema +class CellDatasetMap(dj.Manual): + definition = """ + # Table that maps recorded cells to datasets + -> Dataset + -> Cell + """ def populate_datasets(data_path): @@ -190,7 +167,7 @@ def populate_datasets(data_path): if not os.path.exists(data_path): return dset_name = os.path.split(data_path)[-1] - experimenter, rec_date, quality, comment, has_nix = read_info(os.path.join(data_path, 'info.dat')) + experimenter, rec_date, quality, comment, has_nix = read_dataset_info(os.path.join(data_path, 'info.dat')) if not experimenter: return @@ -243,7 +220,7 @@ def populate_subjects(data_path): def populate_cells(data_path): - print("Importing subject(s) of %s" % data_path) + print("Importing cell(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): @@ -266,10 +243,11 @@ def populate_cells(data_path): dataset = dict((Dataset & {"dataset_id": dset_name}).fetch1()) subject = dict((Subject & {"subject_id": subject_info["Identifier"]}).fetch1()) + dataset_id = dataset["dataset_id"] + cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id cell_props = Cell.get_template_tuple() cell_props["subject_id"] = subject["subject_id"] - cell_props["dataset_id"] = dataset["dataset_id"] - cell_props["cell_name"] = dataset["dataset_id"] + cell_props["cell_id"] = cell_id cell_props["cell_type"] = cell_info["CellType"] cell_props["firing_rate"] = firing_rate if "Structure" in cell_info.keys(): @@ -287,6 +265,10 @@ def populate_cells(data_path): Cell.insert1(cell_props, skip_duplicates=True) + # multi mach entry + mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"]) + CellDatasetMap.insert1(mm, skip_duplicates=True) + def drop_tables(): Dataset.drop() @@ -301,6 +283,7 @@ def populate(datasets): if __name__ == "__main__": - datasets = glob.glob('/data/apteronotus/2018-05-08*') + data_dir = "../../science/high_frequency_chirps/data" + datasets = glob.glob(os.path.join(data_dir, '2018*')) # drop_tables() populate(datasets) diff --git a/util.py b/util.py index 414bb14..137adbf 100644 --- a/util.py +++ b/util.py @@ -1,5 +1,8 @@ -from IPython import embed from functools import reduce +import os +import glob +import datetime as dt + def read_info_file(file_name): """ @@ -43,3 +46,32 @@ def deep_get(dictionary, keys, default=None): assert(isinstance(keys, list)) return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys, dictionary) + +def read_dataset_info(info_file): + exp = "" + quality = "" + comment = "" + rec_date = None + has_nix = False + if not os.path.exists(info_file): + return exp, rec_date, quality, comment, has_nix + has_nix = len(glob.glob(os.path.sep.join(info_file.split(os.path.sep)[:-1]) + os.path.sep + "*.nix")) > 0 + info = read_info_file(info_file) + p = [] + find_key_recursive(info, "Experimenter", p) + if len(p) > 0: + exp = deep_get(info, p) + p = [] + find_key_recursive(info, "Date", p) + if len(p) > 0: + rec_date = dt.date.fromisoformat(deep_get(info, p)) + p = [] + find_key_recursive(info, "Recording quality", p) + if len(p) > 0: + quality = deep_get(info, p) + find_key_recursive(info, "Comment", p) + if len(p) > 0: + comment = deep_get(info, p, default="") + + return exp, rec_date, quality, comment, has_nix +