From 444d6f75c9798f82edeb7cdc16be166ad0eb4984 Mon Sep 17 00:00:00 2001 From: Jan Grewe Date: Tue, 17 Sep 2019 16:50:53 +0200 Subject: [PATCH] renaming almost reverted, add setup an recording duration --- baseline_data.py | 10 ++-- database.py | 143 ++++++++++++++++++++++++----------------------- 2 files changed, 79 insertions(+), 74 deletions(-) diff --git a/baseline_data.py b/baseline_data.py index 51833ac..b41cd05 100644 --- a/baseline_data.py +++ b/baseline_data.py @@ -3,13 +3,13 @@ import nixio as nix import os import numpy as np from IPython import embed -from database import _Dataset, _Repro +from database import Datasets, Repros schema = dj.schema("fish_book", locals()) class BaselineData(object): - def __init__(self, dataset:_Dataset): + def __init__(self, dataset:Datasets): self.__data = [] self.__dataset = dataset self._get_data() @@ -20,11 +20,11 @@ class BaselineData(object): self.__data = [] self.__data = [] - repros = (_Repro & self.__dataset & "repro_name like 'BaselineActivity%'") + repros = (Repros & self.__dataset & "repro_name like 'BaselineActivity%'") for r in repros: self.__data.append(self.__read_data(r)) - def __read_data(self, r:_Repro): + def __read_data(self, r:Repros): if self.__dataset["has_nix"]: return self.__read_data_from_nix(r) else: @@ -89,6 +89,6 @@ class BaselineData(object): if __name__ == "__main__": print("Test") - dataset = _Dataset & "dataset_id like '2018-11-09-aa-%' " + dataset = Datasets & "dataset_id like '2018-11-09-aa-%' " baseline = BaselineData(dataset.fetch1()) embed() \ No newline at end of file diff --git a/database.py b/database.py index bf185fb..30296e9 100644 --- a/database.py +++ b/database.py @@ -13,7 +13,7 @@ schema = dj.schema("fish_book_new", locals()) @schema -class _Dataset(dj.Manual): +class Datasets(dj.Manual): definition = """ # _Dataset dataset_id : varchar(256) ---- @@ -30,21 +30,21 @@ class _Dataset(dj.Manual): @staticmethod def get_template_tuple(id=None): if id is not None: - d = dict((_Dataset() & {"dataset_id": id}).fetch1()) + d = dict((Datasets() & {"dataset_id": id}).fetch1()) return d return dict(dataset_id=None, data_source="", experimenter="", setup="", recording_date=None, quality="", comment="", duration=0.0, has_nix=False) @staticmethod def get_nix_file(key): - dset = (_Dataset() & key).fetch1() + dset = (Datasets() & key).fetch1() if dset["ignore"]: return None file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix") if not (os.path.exists(file_path)): print("\t No nix file found for path: %s" % dset["data_source"]) return None - if not _Dataset.check_file_integrity(file_path): + if not Datasets.check_file_integrity(file_path): return None return file_path @@ -68,19 +68,19 @@ class _Dataset(dj.Manual): @property def cells(self, restrictions:dict=None): - cs = (_Cell & (CellDatasetMap & self) & restrictions).fetch() + cs = (Cells & (CellDatasetMap & self) & restrictions).fetch() return cs @property def subjects(self, restrictions:dict=None): - subjs = (_Subject & (SubjectDatasetMap & self) & restrictions).fetch() + subjs = (Subjects & (SubjectDatasetMap & self) & restrictions).fetch() return subjs @schema -class _Subject(dj.Manual): +class Subjects(dj.Manual): definition = """ - # _Subject + # Subjects subject_id : varchar(256) ---- species : varchar(256) @@ -90,17 +90,17 @@ class _Subject(dj.Manual): def get_template_tuple(subject_id=None): tup = dict(subject_id=None, species="") if subject_id is not None: - d = dict((_Subject() & {"subject_id": subject_id}).fetch1()) + d = dict((Subjects() & {"subject_id": subject_id}).fetch1()) return d return tup def make(self, key): - file_path = _Dataset.get_nix_file(key) + file_path = Datasets.get_nix_file(key) if file_path is None: return nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly) m = nix_file.blocks[0].metadata - inserts = _Subject.get_template_tuple() + inserts = Subjects.get_template_tuple() subj_info = m["Recording"]["Subject"] inserts["subject_id"] = subj_info["Identifier"] inserts["species"] = subj_info["Species"][0] @@ -115,45 +115,46 @@ class _Subject(dj.Manual): def subjects(species=None): subjs = [] if species: - subjs = (_Subject & "species like '%{0:s}%'".format(species)).fetch() + subjs = (Subjects & "species like '%{0:s}%'".format(species)).fetch() else: - subjs = (_Subject & True).fetch() + subjs = (Subjects & True).fetch() return subjs @staticmethod def unique_species(): - all_species = (_Subject & True).fetch("species") + all_species = (Subjects & True).fetch("species") return np.unique(all_species) @property def cells(self): - cs = _Cell & self + cs = Cells & self return cs @property def properties(self): - return (_SubjectProperties & self).fetch1() + return (SubjectProperties & self).fetch1() #@property #def datasets(self): # retrun + @schema class SubjectDatasetMap(dj.Manual): definition = """ # SubjectDatasetMap - -> _Subject - -> _Dataset + -> Subjects + -> Datasets """ @schema -class _SubjectProperties(dj.Manual): +class SubjectProperties(dj.Manual): definition = """ # _SubjectProperties id : int auto_increment ---- - -> _Subject + -> Subjects recording_date : date weight : float size : float @@ -164,17 +165,17 @@ class _SubjectProperties(dj.Manual): tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0) if id is not None: - return dict((_SubjectProperties() & {"id": id}).fetch1()) + return dict((SubjectProperties() & {"id": id}).fetch1()) return tup @schema -class _Cell(dj.Manual): +class Cells(dj.Manual): definition = """ # Table that stores information about recorded cells. cell_id : varchar(256) ---- - -> _Subject + -> Subjects cell_type : varchar(256) firing_rate : float structure : varchar(256) @@ -191,21 +192,21 @@ class _Cell(dj.Manual): depth=0.0, region="", subregion="", structure="", lateral_pos=0.0, transversal_section=0.0) if cell_id is not None: - d = dict((_Cell() & {"cell_id": cell_id}).fetch1()) + d = dict((Cells() & {"cell_id": cell_id}).fetch1()) return d return tup @property def subject(self): - return _Subject & self + return Subjects & self @staticmethod def celltypes(): - return np.unique(_Cell.fetch("cell_type")) + return np.unique(Cells.fetch("cell_type")) @staticmethod def cells(celltype=None, species=None, quality="good"): - cs = _Cell * CellDatasetMap * _Dataset * _Subject + cs = Cells * CellDatasetMap * Datasets * Subjects if celltype: cs = cs & "cell_type like '{0:s}'".format(celltype) if species: @@ -219,17 +220,17 @@ class _Cell(dj.Manual): class CellDatasetMap(dj.Manual): definition = """ # Table that maps recorded cells to datasets - -> _Dataset - -> _Cell + -> Datasets + -> Cells """ @schema -class _Repro(dj.Manual): +class Repros(dj.Manual): definition = """ repro_id : varchar(512) # The name that was given to the RePro run by relacs run : smallint # A counter counting the runs of the ReProp in this dataset - -> _Cell # + -> Cells # ---- repro_name : varchar(512) # The original name of the RePro itself, not any given name by user or relacs settings : varchar(3000) # Yaml formatted string containing the repro settings (tag.metadata in case of a nix file) @@ -239,18 +240,18 @@ class _Repro(dj.Manual): @staticmethod def get_template_tuple(repro_id=None): - tup = dict(repro_id=None, dataset_id=None, run=0, repro_name="", settings=None, start=None, duration=None) + tup = dict(repro_id=None, cell_id=None, run=0, repro_name="", settings=None, start=None, duration=None) if repro_id is not None: - d = dict((_Repro() & {"repro_id": repro_id}).fetch1()) + d = dict((Repros() & {"repro_id": repro_id}).fetch1()) return d return tup @schema -class _Stimulus(dj.Manual): +class Stimuli(dj.Manual): definition = """ stimulus_id : varchar(50) - -> _Repro + -> Repros --- stimulus_index : int stimulus_name : varchar(512) @@ -264,7 +265,7 @@ class _Stimulus(dj.Manual): @staticmethod def get_template_tuple(stimulus_id=None): if stimulus_id is not None: - tup = dict((_Stimulus & {"stimulus_id": stimulus_id}).fetch1()) + tup = dict((Stimuli & {"stimulus_id": stimulus_id}).fetch1()) else: tup = dict(stimulus_id=None, stimulus_index=None, stimulus_name="", start_index=0, start_time=0.0, duration=0.0, settings=None) @@ -275,22 +276,24 @@ def populate_datasets(data_path, update=False): if not os.path.exists(data_path): return dset_name = os.path.split(data_path)[-1] - experimenter, rec_date, quality, comment, has_nix = ut.read_dataset_info(os.path.join(data_path, 'info.dat')) + experimenter, rec_date, quality, comment, has_nix, rec_duration, setup = ut.read_dataset_info(os.path.join(data_path, 'info.dat')) if not experimenter: return False - inserts = _Dataset.get_template_tuple() + inserts = Datasets.get_template_tuple() inserts["dataset_id"] = dset_name inserts["data_source"] = data_path inserts["experimenter"] = experimenter inserts["recording_date"] = rec_date inserts["quality"] = quality if not isinstance(quality, dict) else "" inserts["comment"] = comment if not isinstance(comment, dict) else "" + inserts["duration"] = rec_duration + inserts["setup"] = setup inserts["has_nix"] = has_nix - if len(_Dataset & inserts) > 0 and not update: + if len(Datasets & inserts) > 0 and not update: print('\t\t %s is already in database!' % dset_name) return False - _Dataset().insert1(inserts, skip_duplicates=True) + Datasets().insert1(inserts, skip_duplicates=True) return True @@ -308,7 +311,7 @@ def populate_subjects(data_path): if len(p) > 0: subj = ut.deep_get(info, p) - inserts = _Subject.get_template_tuple() + inserts = Subjects.get_template_tuple() subj_id = None if "Identifier" in subj.keys(): if isinstance(subj["Identifier"], dict): @@ -324,15 +327,15 @@ def populate_subjects(data_path): subj_id = "unspecified_" + dset_name inserts["subject_id"] = subj_id inserts["species"] = subj["Species"] - _Subject().insert1(inserts, skip_duplicates=True) + Subjects().insert1(inserts, skip_duplicates=True) # multi match entry - dataset = dict((_Dataset() & {"dataset_id": dset_name}).fetch1()) + dataset = dict((Datasets() & {"dataset_id": dset_name}).fetch1()) mm = dict(dataset_id=dataset["dataset_id"], subject_id=inserts["subject_id"]) SubjectDatasetMap.insert1(mm, skip_duplicates=True) # subject properties - props = _SubjectProperties.get_template_tuple() + props = SubjectProperties.get_template_tuple() props["subject_id"] = inserts["subject_id"] props["recording_date"] = dataset["recording_date"] if "Weight" in subj.keys(): @@ -343,8 +346,8 @@ def populate_subjects(data_path): props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2])) p = props.copy() p.pop("id") - if len(_SubjectProperties & p) == 0: - _SubjectProperties.insert1(props, skip_duplicates=True) + if len(SubjectProperties & p) == 0: + SubjectProperties.insert1(props, skip_duplicates=True) def populate_cells(data_path): @@ -381,12 +384,12 @@ def populate_cells(data_path): subj_id = info["Identifier"] else: subj_id = "unspecified_" + dset_name - dataset = dict((_Dataset & {"dataset_id": dset_name}).fetch1()) - subject = dict((_Subject & {"subject_id": subj_id}).fetch1()) + dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1()) + subject = dict((Subjects & {"subject_id": subj_id}).fetch1()) dataset_id = dataset["dataset_id"] cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id - cell_props = _Cell.get_template_tuple() + cell_props = Cells.get_template_tuple() cell_props["subject_id"] = subject["subject_id"] cell_props["cell_id"] = cell_id cell_props["cell_type"] = cell_info["CellType"] @@ -404,7 +407,7 @@ def populate_cells(data_path): if "Transverse section" in cell_info.keys(): cell_props["transversal_section"] = float(cell_info["Transverse section"]) - _Cell.insert1(cell_props, skip_duplicates=True) + Cells.insert1(cell_props, skip_duplicates=True) # multi mach entry mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"]) @@ -413,9 +416,10 @@ def populate_cells(data_path): def scan_nix_file_for_repros(dataset): print("\t\tscanning nix file") + cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0] nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix")) for nf in nix_files: - if not _Dataset.check_file_integrity(nf): + if not Datasets.check_file_integrity(nf): print("\t\tfile is not sane!!!") continue f = nix.File.open(nf, nix.FileMode.ReadOnly) @@ -428,10 +432,10 @@ def scan_nix_file_for_repros(dataset): rs = rs[0] print("\t\t%s" % rs["RePro"]) - rp = _Repro.get_template_tuple() + rp = Repros.get_template_tuple() rp["run"] = rs["Run"] rp["repro_name"] = rs["RePro"] - rp["dataset_id"] = dataset["dataset_id"] + rp["cell_id"] = cell_id rp["repro_id"] = t.name settings = t.metadata.find_sections(lambda x: "settings" in x.type) if len(settings) > 0: @@ -440,10 +444,10 @@ def scan_nix_file_for_repros(dataset): rp["settings"] = ut.nix_metadata_to_yaml(t.metadata) rp["start"] = t.position[0] rp["duration"] = t.extent[0] - _Repro.insert1(rp, skip_duplicates=True) + Repros.insert1(rp, skip_duplicates=True) # import Stimuli - repro = dict((_Repro & dict(repro_id=rp["repro_id"], dataset_id=rp["dataset_id"])).fetch1()) + repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1()) repro.pop("settings") repro.pop("repro_name") repro.pop("start") @@ -458,7 +462,7 @@ def scan_nix_file_for_repros(dataset): stim_start = mt_positions[p, 0] stim_duration = mt_extents[p, 0] - stim = _Stimulus.get_template_tuple() + stim = Stimuli.get_template_tuple() stim["stimulus_id"] = str(uuid.uuid1()) stim["stimulus_index"] = p stim["start_time"] = stim_start @@ -468,7 +472,7 @@ def scan_nix_file_for_repros(dataset): stim["mtag_id"] = mt.id stim["stimulus_name"] = mt.name stim.update(repro) - _Stimulus.insert1(stim, skip_duplicates=True) + Stimuli.insert1(stim, skip_duplicates=True) f.close() f = None @@ -477,8 +481,9 @@ def scan_folder_for_repros(dataset): print("\t\tNo nix-file, scanning directory!") repro_settings, stim_indices = ut.read_stimuli_file(dataset["data_source"]) repro_counts = {} + cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0] for i, (rs, si) in enumerate(zip(repro_settings, stim_indices)): - rp = _Repro.get_template_tuple() + rp = Repros.get_template_tuple() path = [] if not ut.find_key_recursive(rs, "run", path): ut.find_key_recursive(rs, "Run", path) @@ -498,15 +503,15 @@ def scan_folder_for_repros(dataset): repro_counts[rp["repro_name"]] += 1 else: repro_counts[rp["repro_name"]] = 1 - rp["dataset_id"] = dataset["dataset_id"] + rp["cell_id"] = cell_id rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]]) rp["start"] = 0. rp["duration"] = 0. rp["settings"] = yaml.dump(rs) - _Repro.insert1(rp, skip_duplicates=True) + Repros.insert1(rp, skip_duplicates=True) # import stimuli - repro = dict((_Repro & dict(repro_id=rp["repro_id"], dataset_id=rp["dataset_id"])).fetch1()) + repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1()) repro.pop("settings") repro.pop("repro_name") repro.pop("start") @@ -526,7 +531,7 @@ def scan_folder_for_repros(dataset): else: stim_duration = 0.0 - stim = _Stimulus.get_template_tuple() + stim = Stimuli.get_template_tuple() stim["stimulus_id"] = str(uuid.uuid1()) stim["stimulus_index"] = j stim["start_time"] = stim_start @@ -536,15 +541,15 @@ def scan_folder_for_repros(dataset): stim["mtag_id"] = "" stim["stimulus_name"] = "" stim.update(repro) - _Stimulus.insert1(stim, skip_duplicates=True) + Stimuli.insert1(stim, skip_duplicates=True) def populate_repros(data_path): print("\tImporting RePro(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] - if len(_Dataset & {"dataset_id": dset_name}) != 1: + if len(Datasets & {"dataset_id": dset_name}) != 1: return False - dataset = dict((_Dataset & {"dataset_id": dset_name}).fetch1()) + dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1()) if dataset["has_nix"]: scan_nix_file_for_repros(dataset) @@ -554,8 +559,8 @@ def populate_repros(data_path): def drop_tables(): - _Dataset.drop() - _Subject.drop() + Datasets.drop() + Subjects.drop() def populate(datasets, update=False): @@ -575,7 +580,7 @@ if __name__ == "__main__": data_dir = "/data/apteronotus" # data_dir = "../high_freq_chirps/data" # drop_tables() - datasets = glob.glob("/Users/jan/zwischenlager/2012-*") - # datasets = glob.glob(os.path.join(data_dir, '/data/eigenmannia/201*')) + # datasets = glob.glob("/Users/jan/zwischenlager/2012-*")2010-06-21-ac/info.dat + datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2018-*')) populate(datasets, update=False)