import numpy as np import datajoint as dj import nixio as nix import os import glob from util import read_info_file, find_key_recursive, deep_get, read_dataset_info from IPython import embed schema = dj.schema("fish_book", locals()) @schema class Dataset(dj.Manual): definition = """ # Dataset dataset_id : varchar(256) ---- data_source : varchar(512) # path to the dataset experimenter : varchar(512) recording_date : date quality : varchar(512) comment : varchar(1024) has_nix : bool """ @staticmethod def get_template_tuple(id=None): if id is not None: d = dict((Dataset() & {"dataset_id": id}).fetch1()) return d return dict(dataset_id=None, data_source="", experimenter="", recording_date=None, quality="", comment="", has_nix=False) @staticmethod def get_nix_file(key): dset = (Dataset() & key).fetch1() if dset["ignore"]: return None file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix") if not (os.path.exists(file_path)): print("\t No nix file found for path: %s" % dset["data_source"]) return None if not Dataset.check_file_integrity(file_path): return None return file_path @staticmethod def check_file_integrity(nix_file): sane = True try: f = nix.File.open(nix_file, nix.FileMode.ReadOnly) b = f.blocks[0] m = b.metadata if "Recording" not in m.sections: Warning("\t Could not find Recording section in dataset: %s" % nix_file) sane = False f.close() except (): print("file: %s is NOT SANE!") sane = False return sane @schema class Subject(dj.Manual): definition = """ # Subject subject_id : varchar(256) ---- species : varchar(256) """ @staticmethod def get_template_tuple(subject_id=None): tup = dict(subject_id=None, species="") if subject_id is not None: d = dict((Subject() & {"subject_id": subject_id}).fetch1()) return d return tup def make(self, key): file_path = Dataset.get_nix_file(key) if file_path is None: return nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly) m = nix_file.blocks[0].metadata inserts = Subject.get_template_tuple() subj_info = m["Recording"]["Subject"] inserts["subject_id"] = subj_info["Identifier"] inserts["species"] = subj_info["Species"][0] inserts["weight"] = subj_info["Weight"] inserts["size"] = subj_info["Size"] inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10 inserts.update(key) self.insert1(inserts, skip_duplicates=True) nix_file.close() @schema class SubjectDatasetMap(dj.Manual): definition = """ # SubjectDatasetMap -> Subject -> Dataset """ @schema class SubjectProperties(dj.Manual): definition = """ # SubjectProperties id : int auto_increment ---- -> Subject recording_date : date weight : float size : float eod_frequency : float """ def get_template_tuple(id=None): tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0) if id is not None: return dict((SubjectProperties() & {"id": id}).fetch1()) return tup @schema class Cell(dj.Manual): definition = """ # Table that stores information about recorded cells. cell_id : varchar(256) ---- -> Subject cell_type : varchar(256) firing_rate : float structure : varchar(256) region : varchar(256) subregion : varchar(256) depth : float lateral_pos : float transversal_section : float """ @staticmethod def get_template_tuple(cell_id=None): tup = dict(cell_id=None, subject_id=None, cell_type="", firing_rate=0.0, depth=0.0, region="", subregion="", structure="", lateral_pos=0.0, transversal_section=0.0) if cell_id is not None: d = dict((Cell() & {"cell_id": cell_id}).fetch1()) return d return tup @schema class CellDatasetMap(dj.Manual): definition = """ # Table that maps recorded cells to datasets -> Dataset -> Cell """ def populate_datasets(data_path): print("Importing dataset %s" % data_path) if not os.path.exists(data_path): return dset_name = os.path.split(data_path)[-1] experimenter, rec_date, quality, comment, has_nix = read_dataset_info(os.path.join(data_path, 'info.dat')) if not experimenter: return inserts = Dataset.get_template_tuple() inserts["dataset_id"] = dset_name inserts["data_source"] = data_path inserts["experimenter"] = experimenter inserts["recording_date"] = rec_date inserts["quality"] = quality inserts["comment"] = comment inserts["has_nix"] = has_nix Dataset().insert1(inserts, skip_duplicates=True) def populate_subjects(data_path): print("Importing subject(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False info = read_info_file(info_file) p = [] find_key_recursive(info, "Subject", p) if len(p) > 0: subj = deep_get(info, p) inserts = Subject.get_template_tuple() inserts["subject_id"] = subj["Identifier"] inserts["species"] = subj["Species"] Subject().insert1(inserts, skip_duplicates=True) # multi mach entry dataset = dict((Dataset() & {"dataset_id": dset_name}).fetch1()) mm = dict(dataset_id=dataset["dataset_id"], subject_id=subj["Identifier"]) SubjectDatasetMap.insert1(mm, skip_duplicates=True) # subject properties props = SubjectProperties.get_template_tuple() props["subject_id"] = subj["Identifier"] props["recording_date"] = dataset["recording_date"] if "Weight" in subj.keys(): props["weight"] = np.round(float(subj["Weight"][:-1]), 1) if "Size" in subj.keys(): props["size"] = np.round(float(subj["Size"][:-2]), 1) if "EOD Frequency" in subj.keys(): props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2])) p = props.copy() p.pop("id") if len(SubjectProperties & p) == 0: SubjectProperties.insert1(props, skip_duplicates=True) def populate_cells(data_path): print("Importing cell(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False info = read_info_file(info_file) p = [] find_key_recursive(info, "Subject", p) subject_info = deep_get(info, p) p = [] find_key_recursive(info, "Cell", p) cell_info = deep_get(info, p) p = [] find_key_recursive(info, "Firing Rate1", p) firing_rate = deep_get(info, p, default=0.0) if isinstance(firing_rate, str): firing_rate = float(firing_rate[:-2]) dataset = dict((Dataset & {"dataset_id": dset_name}).fetch1()) subject = dict((Subject & {"subject_id": subject_info["Identifier"]}).fetch1()) dataset_id = dataset["dataset_id"] cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id cell_props = Cell.get_template_tuple() cell_props["subject_id"] = subject["subject_id"] cell_props["cell_id"] = cell_id cell_props["cell_type"] = cell_info["CellType"] cell_props["firing_rate"] = firing_rate if "Structure" in cell_info.keys(): cell_props["structure"] = cell_info["Structure"] if "BrainRegion" in cell_info.keys(): cell_props["region"] = cell_info["BrainRegion"] if "BrainSubRegion" in cell_info.keys(): cell_props["subregion"] = cell_info["BrainSubRegion"] if "Depth" in cell_info.keys(): cell_props["depth"] = float(cell_info["Depth"][:-2]) if "Lateral position" in cell_info.keys(): cell_props["lateral_pos"] = float(cell_info["Lateral position"][:-2]) if "Transverse section" in cell_info.keys(): cell_props["transversal_section"] = float(cell_info["Transverse section"]) Cell.insert1(cell_props, skip_duplicates=True) # multi mach entry mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"]) CellDatasetMap.insert1(mm, skip_duplicates=True) def drop_tables(): Dataset.drop() Subject.drop() def populate(datasets): for d in datasets: populate_datasets(d) populate_subjects(d) populate_cells(d) if __name__ == "__main__": data_dir = "../../science/high_frequency_chirps/data" datasets = glob.glob(os.path.join(data_dir, '2018*')) # drop_tables() populate(datasets)