import numpy as np import datajoint as dj import nixio as nix import os import glob import socket from fishbook.backend.util import read_info_file, read_dataset_info, read_stimuli_file from fishbook.backend.util import find_key_recursive, deep_get, find_mtags_for_tag from fishbook.backend.util import mtag_settings_to_yaml, nix_metadata_to_yaml, mtag_features_to_yaml, progress import uuid import yaml from IPython import embed dj.config["enable_python_native_blobs"] = True schema = dj.schema("fish_book", locals()) @schema class Datasets(dj.Manual): definition = """ # _Dataset dataset_id : varchar(256) ---- data_source : varchar(512) # path to the dataset data_host : varchar(512) # fully qualified domain name experimenter : varchar(512) setup : varchar(128) recording_date : date quality : varchar(512) comment : varchar(1024) duration : float has_nix : bool """ @staticmethod def get_template_tuple(id=None): if id is not None: d = dict((Datasets() & {"dataset_id": id}).fetch1()) return d return dict(dataset_id=None, data_source="", data_host="", experimenter="", setup="", recording_date=None, quality="", comment="", duration=0.0, has_nix=False) @staticmethod def get_nix_file(key): dset = (Datasets() & key).fetch1() if dset["ignore"]: return None file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix") if not (os.path.exists(file_path)): print("\t No nix file found for path: %s" % dset["data_source"]) return None if not Datasets.check_file_integrity(file_path): return None return file_path @staticmethod def check_file_integrity(nix_file): sane = True try: f = nix.File.open(nix_file, nix.FileMode.ReadOnly) b = f.blocks[0] m = b.metadata if "Recording" not in m.sections: Warning("\t Could not find Recording section in dataset: %s" % nix_file) sane = False f.close() except (): f = None print("file: %s is NOT SANE!") sane = False return sane @schema class Subjects(dj.Manual): definition = """ # Subjects subject_id : varchar(256) ---- species : varchar(256) """ @staticmethod def get_template_tuple(subject_id=None): tup = dict(subject_id=None, species="") if subject_id is not None: d = dict((Subjects() & {"subject_id": subject_id}).fetch1()) return d return tup def make(self, key): file_path = Datasets.get_nix_file(key) if file_path is None: return nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly) m = nix_file.blocks[0].metadata inserts = Subjects.get_template_tuple() subj_info = m["Recording"]["Subject"] inserts["subject_id"] = subj_info["Identifier"] inserts["species"] = subj_info["Species"][0] inserts["weight"] = subj_info["Weight"] inserts["size"] = subj_info["Size"] inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10 inserts.update(key) self.insert1(inserts, skip_duplicates=True) nix_file.close() #@property #def datasets(self): # retrun @schema class SubjectDatasetMap(dj.Manual): definition = """ # SubjectDatasetMap -> Subjects -> Datasets """ @schema class SubjectProperties(dj.Manual): definition = """ # _SubjectProperties id : int auto_increment ---- -> Subjects recording_date : date weight : float size : float eod_frequency : float """ @staticmethod def get_template_tuple(id=None): tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0) if id is not None: return dict((SubjectProperties() & {"id": id}).fetch1()) return tup @schema class Cells(dj.Manual): definition = """ # Table that stores information about recorded cells. cell_id : varchar(256) ---- -> Subjects cell_type : varchar(256) firing_rate : float structure : varchar(256) region : varchar(256) subregion : varchar(256) depth : float lateral_pos : float transversal_section : float """ @staticmethod def get_template_tuple(cell_id=None): tup = dict(cell_id=None, subject_id=None, cell_type="", firing_rate=0.0, depth=0.0, region="", subregion="", structure="", lateral_pos=0.0, transversal_section=0.0) if cell_id is not None: d = dict((Cells() & {"cell_id": cell_id}).fetch1()) return d return tup @schema class CellDatasetMap(dj.Manual): definition = """ # Table that maps recorded cells to datasets -> Datasets -> Cells """ @schema class Repros(dj.Manual): definition = """ repro_id : varchar(512) # The name that was given to the RePro run by relacs run : smallint # A counter counting the runs of the ReProp in this dataset -> Cells # ---- repro_name : varchar(512) # The original name of the RePro itself, not any given name by user or relacs settings : varchar(3000) # Yaml formatted string containing the repro settings (tag.metadata in case of a nix file) start : float # The start time of the repro duration : float # The duration of the repro """ @staticmethod def get_template_tuple(repro_id=None): tup = dict(repro_id=None, cell_id=None, run=0, repro_name="", settings=None, start=None, duration=None) if repro_id is not None: d = dict((Repros() & {"repro_id": repro_id}).fetch1()) return d return tup @schema class Stimuli(dj.Manual): definition = """ stimulus_id : varchar(50) -> Repros --- stimulus_index : int stimulus_name : varchar(512) mtag_id : varchar(50) start_time : float start_index : int duration : float settings : varchar(3000) """ @staticmethod def get_template_tuple(stimulus_id=None): if stimulus_id is not None: tup = dict((Stimuli & {"stimulus_id": stimulus_id}).fetch1()) else: tup = dict(stimulus_id=None, stimulus_index=None, stimulus_name="", start_index=0, start_time=0.0, duration=0.0, settings=None) return tup def populate_datasets(data_path, update=False): if not os.path.exists(data_path): return False dset_name = os.path.split(data_path)[-1] experimenter, rec_date, quality, comment, has_nix, rec_duration, setup = read_dataset_info(os.path.join(data_path, 'info.dat')) if not experimenter: return False inserts = Datasets.get_template_tuple() inserts["dataset_id"] = dset_name inserts["data_source"] = os.path.abspath(data_path) inserts["data_host"] = socket.getfqdn() inserts["experimenter"] = experimenter inserts["recording_date"] = rec_date inserts["quality"] = quality if not isinstance(quality, dict) else "" inserts["comment"] = comment if not isinstance(comment, dict) else "" inserts["duration"] = rec_duration inserts["setup"] = setup inserts["has_nix"] = has_nix if len(Datasets & "dataset_id like '%s'" % inserts["dataset_id"]) > 0 and not update: print('\t\t %s is already in database!' % dset_name) return False Datasets().insert1(inserts, skip_duplicates=True) return True def populate_subjects(data_path): print("\tImporting subject(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False info = read_info_file(info_file) p = [] find_key_recursive(info, "Subject", p) subj = {} if len(p) > 0: subj = deep_get(info, p) inserts = Subjects.get_template_tuple() subj_id = None if "Identifier" in subj.keys(): if isinstance(subj["Identifier"], dict): subj_id = "unspecified_" + dset_name else: subj_id = subj["Identifier"] elif "Identifier" in info.keys(): if isinstance(info["Identifier"], dict): subj_id = "unspecified_" + dset_name else: subj_id = info["Identifier"] else: subj_id = "unspecified_" + dset_name inserts["subject_id"] = subj_id inserts["species"] = subj["Species"] Subjects().insert1(inserts, skip_duplicates=True) # multi match entry dataset = dict((Datasets() & {"dataset_id": dset_name}).fetch1()) mm = dict(dataset_id=dataset["dataset_id"], subject_id=inserts["subject_id"]) SubjectDatasetMap.insert1(mm, skip_duplicates=True) # subject properties props = SubjectProperties.get_template_tuple() props["subject_id"] = inserts["subject_id"] props["recording_date"] = dataset["recording_date"] if "Weight" in subj.keys(): props["weight"] = np.round(float(subj["Weight"][:-1]), 1) if "Size" in subj.keys(): props["size"] = np.round(float(subj["Size"][:-2]), 1) if "EOD Frequency" in subj.keys(): props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2])) p = props.copy() p.pop("id") if len(SubjectProperties & p) == 0: SubjectProperties.insert1(props, skip_duplicates=True) def populate_cells(data_path): print("\tImporting cell(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False info = read_info_file(info_file) p = [] find_key_recursive(info, "Subject", p) subject_info = deep_get(info, p) p = [] find_key_recursive(info, "Cell", p) cell_info = deep_get(info, p) p = [] res = find_key_recursive(info, "Firing Rate1", p) if res: firing_rate = deep_get(info, p, default=0.0) else: firing_rate = 0.0 if isinstance(firing_rate, str): firing_rate = float(firing_rate[:-2]) subj_id = None if "Identifier" in subject_info.keys(): if isinstance(subject_info["Identifier"], dict): subj_id = "unspecified_" + dset_name else: subj_id = subject_info["Identifier"] elif "Identifier" in info.keys(): if isinstance(info["Identifier"], dict): subj_id = "unspecified_" + dset_name else: subj_id = info["Identifier"] else: subj_id = "unspecified_" + dset_name dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1()) subject = dict((Subjects & {"subject_id": subj_id}).fetch1()) dataset_id = dataset["dataset_id"] cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id cell_props = Cells.get_template_tuple() cell_props["subject_id"] = subject["subject_id"] cell_props["cell_id"] = cell_id cell_props["cell_type"] = cell_info["CellType"] cell_props["firing_rate"] = firing_rate if "Structure" in cell_info.keys(): cell_props["structure"] = cell_info["Structure"] if "BrainRegion" in cell_info.keys(): cell_props["region"] = cell_info["BrainRegion"] if "BrainSubRegion" in cell_info.keys(): cell_props["subregion"] = cell_info["BrainSubRegion"] if "Depth" in cell_info.keys(): cell_props["depth"] = float(cell_info["Depth"][:-2]) if "Lateral position" in cell_info.keys(): cell_props["lateral_pos"] = float(cell_info["Lateral position"][:-2]) if "Transverse section" in cell_info.keys(): cell_props["transversal_section"] = float(cell_info["Transverse section"]) Cells.insert1(cell_props, skip_duplicates=True) # multi match entry mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"]) CellDatasetMap.insert1(mm, skip_duplicates=True) def scan_nix_file_for_repros(dataset): print("\t\tscanning nix file") cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0] nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix")) for nf in nix_files: if not Datasets.check_file_integrity(nf): print("\t\tfile is not sane!!!") continue f = nix.File.open(nf, nix.FileMode.ReadOnly) b = f.blocks[0] repro_runs = [t for t in b.tags if "relacs.repro_run" in t.type] total = len(repro_runs) for i, t in enumerate(repro_runs): rs = t.metadata.find_sections(lambda x: "Run" in x.props) rs = rs[0] if len(rs) == 0: continue progress(i+1, total, "Scanning repro run %s" % rs["RePro"]) rp = Repros.get_template_tuple() rp["run"] = rs["Run"] rp["repro_name"] = rs["RePro"] rp["cell_id"] = cell_id rp["repro_id"] = t.name settings = t.metadata.find_sections(lambda x: "settings" in x.type) if len(settings) > 0: rp["settings"] = nix_metadata_to_yaml(settings[0]) else: rp["settings"] = nix_metadata_to_yaml(t.metadata) rp["start"] = t.position[0] rp["duration"] = t.extent[0] Repros.insert1(rp, skip_duplicates=True) # import Stimuli repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1()) repro.pop("settings") repro.pop("repro_name") repro.pop("start") repro.pop("duration") mtags, positions = find_mtags_for_tag(b, t) mt_settings_dict = {} positions_dict = {} extents_dict = {} for j, mt in enumerate(mtags): if mt.id in positions_dict.keys(): mt_positions = positions_dict[mt.id] mt_extents = extents_dict[mt.id] mdata_yaml = mt_settings_dict[mt.id] else: mdata_yaml = nix_metadata_to_yaml(mt.metadata) mt_settings_dict[mt.id] = mdata_yaml mt_positions = np.atleast_2d(mt.positions[:]) mt_extents = np.atleast_2d(mt.extents[:]) if mt.positions.shape[0] != mt_positions.shape[0]: mt_positions = mt_positions.T mt_extents = mt_extents.T for p in positions[j]: settings = mtag_features_to_yaml(mt, p, mdata_yaml) stim_start = mt_positions[p, 0] stim_duration = mt_extents[p, 0] stim = Stimuli.get_template_tuple() stim["stimulus_id"] = str(uuid.uuid1()) stim["stimulus_index"] = p stim["start_time"] = stim_start stim["start_index"] = -1 stim["duration"] = stim_duration stim["settings"] = settings stim["mtag_id"] = mt.id stim["stimulus_name"] = mt.name stim.update(repro) Stimuli.insert1(stim, skip_duplicates=True) print(" " * 120, end="\r") print("\n") f.close() f = None def scan_folder_for_repros(dataset): print("\t\tNo nix-file, scanning directory!") repro_settings, stim_indices = read_stimuli_file(dataset["data_source"]) repro_counts = {} cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0] for rs, si in zip(repro_settings, stim_indices): rp = Repros.get_template_tuple() path = [] if not find_key_recursive(rs, "repro", path): find_key_recursive(rs, "RePro", path) rp["repro_name"] = deep_get(rs, path, "None") path = [] if rp["repro_name"] in repro_counts.keys(): repro_counts[rp["repro_name"]] += 1 else: repro_counts[rp["repro_name"]] = 0 path = [] if not find_key_recursive(rs, "run", path): find_key_recursive(rs, "Run", path) if len(path) > 0: rp["run"] = deep_get(rs, path, 0) else: # the run information is not there and needs to be fixed! rp["run"] = repro_counts[rp["repro_name"]] rp["cell_id"] = cell_id rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]]) rp["start"] = 0. rp["duration"] = 0. rp["settings"] = yaml.dump(rs).replace("'", "") Repros.insert1(rp, skip_duplicates=True) # import stimuli repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1()) repro.pop("settings") repro.pop("repro_name") repro.pop("start") repro.pop("duration") total = len(si.keys()) for j, k in enumerate(si.keys()): progress(j+1, total, "scanning repro %s" % rp["repro_name"]) s = int(si[k]) stim_start = 0. path = [] if not find_key_recursive(rs, "duration", path): find_key_recursive(rs, "Duration", path) if len(path) > 0 : stim_duration = deep_get(rs, path, None) if "ms" in stim_duration: stim_duration = float(stim_duration[:stim_duration.index("ms")]) else: stim_duration = float(stim_duration[:stim_duration.index("s")]) else: stim_duration = 0.0 stim = Stimuli.get_template_tuple() stim["stimulus_id"] = str(uuid.uuid1()) stim["stimulus_index"] = j stim["start_time"] = stim_start stim["start_index"] = s stim["duration"] = stim_duration stim["settings"] = yaml.dump(rs).replace("'", "") stim["mtag_id"] = "" stim["stimulus_name"] = "" stim.update(repro) Stimuli.insert1(stim, skip_duplicates=True) print(" " *120, end='\r') #if i < len(repro_settings): # print((" " * 150), end="\r") def populate_repros(data_path): print("\tImporting RePro(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] if len(Datasets & {"dataset_id": dset_name}) != 1: return False dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1()) if dataset["has_nix"]: scan_nix_file_for_repros(dataset) else: scan_folder_for_repros(dataset) return True def drop_tables(): Datasets.drop() Subjects.drop() def populate(datasets, update=False): for i, d in enumerate(datasets): print("Importing %i of %i: %s" % (i+1, len(datasets), d)) if not populate_datasets(d, update): continue populate_subjects(d) populate_cells(d) try: populate_repros(d) except (): print("\t\tsomething went wrong! %s" % d) if __name__ == "__main__": data_dir = "/data/apteronotus" # data_dir = "../high_freq_chirps/data" # drop_tables() # datasets = glob.glob("/Users/jan/zwischenlager/2012-*")2010-06-21-ac/info.dat datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-06-18*')) populate(datasets, update=False)