diff --git a/fishbook/__init__.py b/fishbook/__init__.py new file mode 100644 index 0000000..fcb6241 --- /dev/null +++ b/fishbook/__init__.py @@ -0,0 +1,3 @@ +from fishbook.fishbook import * +import fishbook.database as database +__all__ = ['fishbook', 'database'] \ No newline at end of file diff --git a/baseline_data.py b/fishbook/baseline_data.py similarity index 86% rename from baseline_data.py rename to fishbook/baseline_data.py index 0e2e33b..41abae7 100644 --- a/baseline_data.py +++ b/fishbook/baseline_data.py @@ -1,11 +1,12 @@ -import datajoint as dj -import nixio as nix -import os -import numpy as np +print(__name__) +print(__package__) +__package__ = "fishbook" +from fishbook.fishbook import Dataset +#from .database.database import * +#from .fishbook import Cell, Dataset + +#schema = dj.schema("fish_book", locals()) from IPython import embed -from database import * -schema = dj.schema("fish_book", locals()) - class BaselineData(object): @@ -87,11 +88,11 @@ class BaselineData(object): return np.asarray(data) -if __name__ == "__main__": - print("Test") - embed() - exit() +#if __name__ == "__main__": +print("Test") +embed() +exit() - dataset = Dataset(tuple=(Datasets & "dataset_id like '2018-11-09-aa-%'").fetch(limit=1, as_dict=True)) - baseline = BaselineData(dataset) - embed() \ No newline at end of file +dataset = Dataset(tuple=(Datasets & "dataset_id like '2018-11-09-aa-%'").fetch(limit=1, as_dict=True)) +baseline = BaselineData(dataset) +embed() \ No newline at end of file diff --git a/fishbook/database/__init__.py b/fishbook/database/__init__.py new file mode 100644 index 0000000..ee8efb1 --- /dev/null +++ b/fishbook/database/__init__.py @@ -0,0 +1,3 @@ +from .database import * + +__all__ = ['database'] \ No newline at end of file diff --git a/database.py b/fishbook/database/database.py similarity index 71% rename from database.py rename to fishbook/database/database.py index 3d83667..a75adf9 100644 --- a/database.py +++ b/fishbook/database/database.py @@ -3,12 +3,13 @@ import datajoint as dj import nixio as nix import os import glob -import util as ut +from .util import read_info_file, read_dataset_info, read_stimuli_file +from .util import find_key_recursive, deep_get, find_mtags_for_tag +from .util import mtag_settings_to_yaml, nix_metadata_to_yaml import uuid import yaml -from IPython import embed -schema = dj.schema("fish_book_new", locals()) +schema = dj.schema("fish_book", locals()) @schema @@ -65,76 +66,6 @@ class Datasets(dj.Manual): return sane -class Dataset: - def __init__(self, dataset_id=None, tuple=None): - if tuple: - self.__tuple = tuple - elif dataset_id: - pattern = "dataset_id like '{0:s}'".format(dataset_id) - dsets = (Datasets & pattern) - assert(len(dsets) == 1), "Dataset name is not unique!" - self.__tuple = dsets.fetch(limit=1)[0] - else: - print("Empty dataset, not linked to any database entry!") - - @property - def dataset_id(self): - return self.__tuple["dataset_id"] - - @property - def experimenter(self): - return self.__tuple["experimenter"] - - @property - def recording_date(self): - return self.__tuple["recording_date"] - - @property - def recording_duration(self): - return self.__tuple["duration"] - - @property - def quality(self): - return self.__tuple["quality"] - - @property - def has_nix(self): - return self.__tuple["has_nix"] - - @property - def comment(self): - return self.__tuple["comment"] - - @property - def data_source(self): - return self.__tuple["data_source"] - - @property - def setup(self): - return self.__tuple["setup"] - - @property - def cells(self): - cs = (Cells * (CellDatasetMap & self.__tuple)) - return [Cell(tuple=c) for c in cs] - - @property - def subjects(self): - subjs = (Subjects * (SubjectDatasetMap & self.__tuple)) - return [Subject(tuple=s) for s in subjs] - - @staticmethod - def find_datasets(min_duration=None, experimenter=None, quality=None): - dsets = Datasets - if min_duration: - dsets = dsets & "duration > %.2f" % min_duration - if experimenter: - dsets = dsets & "experimenter like '%{0:s}%'".format(experimenter) - if quality: - dsets = dsets & "quality like '{0:s}'".format(quality) - return [Dataset(tuple=d) for d in dsets] - - @schema class Subjects(dj.Manual): definition = """ @@ -174,47 +105,6 @@ class Subjects(dj.Manual): # retrun -class Subject: - __tuple = {} - - def __init__(self, subject_id=None, tuple=None): - if tuple: - self.__tuple = tuple - elif subject_id: - self.__tuple = Subjects & "subject_id like '{0:s}'".format(subject_id).fetch()[0] - else: - print("Empty Subject, not linked to any database entry!") - - @property - def subject_id(self): - return self.__tuple["subject_id"] - - @property - def species(self): - return self.__tuple["species"] - - @property - def cells(self): - cs = Cells & self.__tuple - return [Cell(tuple=c) for c in cs] - - @property - def properties(self): - return (SubjectProperties & self.__tuple).fetch(as_dict=True) - - @staticmethod - def find_subjects(species=None): - subjs = Subjects & True - if species: - subjs = (Subjects & "species like '%{0:s}%'".format(species)) - return [Subject(tuple=s) for s in subjs] - - @staticmethod - def unique_species(): - all_species = (Subjects & True).fetch("species") - return np.unique(all_species) - - @schema class SubjectDatasetMap(dj.Manual): definition = """ @@ -273,65 +163,6 @@ class Cells(dj.Manual): return tup -class Cell: - def __init__(self, cell_id=None, tuple=None): - if tuple: - self.__tuple = tuple - elif cell_id: - pattern = "cell_id like '{0:s}'".format(cell_id) - cells = (Cells & pattern) - assert (len(cells) == 1), "Cell id is not unique!" - self.__tuple = cells.fetch(as_dict=True)[0] - else: - print("Empty Cell, not linked to any database entry!") - - @property - def cell_id(self): - return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else "" - - @property - def cell_type(self): - return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else "" - - @property - def firing_rate(self): - return self.__tuple["firing_rate"] if "firing_rate" in self.__tuple.keys() else 0.0 - - @property - def location(self): - keys = ["structure", "region", "subregion", "depth", "lateral_pos", "transversal_section"] - loc = {} - for k in keys: - if k in self.__tuple.keys(): - loc[k] = self.__tuple[k] - else: - loc[k] = "" - return loc - - @property - def subject(self): - return Subject(tuple=(Subjects & {"subject_id": self.__tuple["subject_id"]}).fetch(limit=1, as_dict=True)[0]) - - @staticmethod - def celltypes(): - return np.unique(Cells.fetch("cell_type")) - - @staticmethod - def find_cells(cell_type=None, species=None, quality="good"): - cs = Cells * CellDatasetMap * Datasets * Subjects - if cell_type: - cs = cs & "cell_type like '{0:s}'".format(cell_type) - if species: - cs = cs & "species like '%{0:s}%'".format(species) - if quality: - cs = cs & "quality like '{0:s}'".format(quality) - return [Cell(tuple=c) for c in cs] - - def __str__(self): - str = "" - str += "Cell: %s \t type: %s\n"%(self.cell_id, self.cell_type) - return str - @schema class CellDatasetMap(dj.Manual): definition = """ @@ -390,9 +221,9 @@ class Stimuli(dj.Manual): def populate_datasets(data_path, update=False): if not os.path.exists(data_path): - return + return False dset_name = os.path.split(data_path)[-1] - experimenter, rec_date, quality, comment, has_nix, rec_duration, setup = ut.read_dataset_info(os.path.join(data_path, 'info.dat')) + experimenter, rec_date, quality, comment, has_nix, rec_duration, setup = read_dataset_info(os.path.join(data_path, 'info.dat')) if not experimenter: return False @@ -406,7 +237,7 @@ def populate_datasets(data_path, update=False): inserts["duration"] = rec_duration inserts["setup"] = setup inserts["has_nix"] = has_nix - if len(Datasets & inserts) > 0 and not update: + if len(Datasets & "dataset_id like '%s'" % inserts["dataset_id"]) > 0 and not update: print('\t\t %s is already in database!' % dset_name) return False Datasets().insert1(inserts, skip_duplicates=True) @@ -419,13 +250,13 @@ def populate_subjects(data_path): info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False - info = ut.read_info_file(info_file) + info = read_info_file(info_file) p = [] - ut.find_key_recursive(info, "Subject", p) + find_key_recursive(info, "Subject", p) subj = {} if len(p) > 0: - subj = ut.deep_get(info, p) + subj = deep_get(info, p) inserts = Subjects.get_template_tuple() subj_id = None @@ -472,18 +303,18 @@ def populate_cells(data_path): info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False - info = ut.read_info_file(info_file) + info = read_info_file(info_file) p = [] - ut.find_key_recursive(info, "Subject", p) - subject_info = ut.deep_get(info, p) + find_key_recursive(info, "Subject", p) + subject_info = deep_get(info, p) p = [] - ut.find_key_recursive(info, "Cell", p) - cell_info = ut.deep_get(info, p) + find_key_recursive(info, "Cell", p) + cell_info = deep_get(info, p) p = [] - ut.find_key_recursive(info, "Firing Rate1", p) - firing_rate = ut.deep_get(info, p, default=0.0) + find_key_recursive(info, "Firing Rate1", p) + firing_rate = deep_get(info, p, default=0.0) if isinstance(firing_rate, str): firing_rate = float(firing_rate[:-2]) @@ -555,9 +386,9 @@ def scan_nix_file_for_repros(dataset): rp["repro_id"] = t.name settings = t.metadata.find_sections(lambda x: "settings" in x.type) if len(settings) > 0: - rp["settings"] = ut.nix_metadata_to_yaml(settings[0]) + rp["settings"] = nix_metadata_to_yaml(settings[0]) else: - rp["settings"] = ut.nix_metadata_to_yaml(t.metadata) + rp["settings"] = nix_metadata_to_yaml(t.metadata) rp["start"] = t.position[0] rp["duration"] = t.extent[0] Repros.insert1(rp, skip_duplicates=True) @@ -569,12 +400,12 @@ def scan_nix_file_for_repros(dataset): repro.pop("start") repro.pop("duration") - mtags, positions = ut.find_mtags_for_tag(b, t) + mtags, positions = find_mtags_for_tag(b, t) for i, mt in enumerate(mtags): mt_positions = np.atleast_2d(mt.positions[:]).T mt_extents = np.atleast_2d(mt.extents[:]).T for p in positions[i]: - settings = ut.mtag_settings_to_yaml(mt, p) + settings = mtag_settings_to_yaml(mt, p) stim_start = mt_positions[p, 0] stim_duration = mt_extents[p, 0] @@ -595,24 +426,24 @@ def scan_nix_file_for_repros(dataset): def scan_folder_for_repros(dataset): print("\t\tNo nix-file, scanning directory!") - repro_settings, stim_indices = ut.read_stimuli_file(dataset["data_source"]) + repro_settings, stim_indices = read_stimuli_file(dataset["data_source"]) repro_counts = {} cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0] for i, (rs, si) in enumerate(zip(repro_settings, stim_indices)): rp = Repros.get_template_tuple() path = [] - if not ut.find_key_recursive(rs, "run", path): - ut.find_key_recursive(rs, "Run", path) + if not find_key_recursive(rs, "run", path): + find_key_recursive(rs, "Run", path) if len(path) > 0: - rp["run"] = ut.deep_get(rs, path, 0) + rp["run"] = deep_get(rs, path, 0) else: rp["run"] = -1 path = [] - if not ut.find_key_recursive(rs, "repro", path): - ut.find_key_recursive(rs, "RePro", path) - print("\t\t %s" % ut.deep_get(rs, path, "None")) - rp["repro_name"] = ut.deep_get(rs, path, "None") + if not find_key_recursive(rs, "repro", path): + find_key_recursive(rs, "RePro", path) + print("\t\t %s" % deep_get(rs, path, "None")) + rp["repro_name"] = deep_get(rs, path, "None") path = [] if rp["repro_name"] in repro_counts.keys(): @@ -636,10 +467,10 @@ def scan_folder_for_repros(dataset): s = int(si[k]) stim_start = 0. path = [] - if not ut.find_key_recursive(rs, "duration", path): - ut.find_key_recursive(rs, "Duration", path) + if not find_key_recursive(rs, "duration", path): + find_key_recursive(rs, "Duration", path) if len(path) > 0 : - stim_duration = ut.deep_get(rs, path, None) + stim_duration = deep_get(rs, path, None) if "ms" in stim_duration: stim_duration = float(stim_duration[:stim_duration.index("ms")]) else: @@ -697,6 +528,6 @@ if __name__ == "__main__": # data_dir = "../high_freq_chirps/data" # drop_tables() # datasets = glob.glob("/Users/jan/zwischenlager/2012-*")2010-06-21-ac/info.dat - datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-*')) + datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2011-*')) populate(datasets, update=False) diff --git a/util.py b/fishbook/database/util.py similarity index 98% rename from util.py rename to fishbook/database/util.py index 95b0368..e821d19 100644 --- a/util.py +++ b/fishbook/database/util.py @@ -166,7 +166,7 @@ def _get_string(dictionary: dict, key:str, alt_key=None, default=None): elif alt_key: find_key_recursive(dictionary, alt_key, p) value = deep_get(dictionary, p, default) - if value != default and isinstance(value, dict): + if default and value != default and isinstance(value, dict): value = default return value @@ -203,8 +203,11 @@ def read_dataset_info(info_file): quality = _get_string(info, "Recording quality") comment = _get_string(info, "Comment", default="") rec_duration = _get_string(info, "Recording duration", "Recording duratio", default=0.0) + if rec_duration != 0.0 and isinstance(rec_duration, str) and "min" in rec_duration: rec_duration = rec_duration[:-3] + elif isinstance(rec_duration, dict): + rec_duration = 0.0 setup_info = _get_string(info, "Setup", default=None) if setup_info and isinstance(setup_info, dict): setup = _get_string(setup_info, "Identifier") diff --git a/fishbook/fishbook.py b/fishbook/fishbook.py new file mode 100644 index 0000000..7adbcd1 --- /dev/null +++ b/fishbook/fishbook.py @@ -0,0 +1,271 @@ +from .database.database import Cells, Datasets, CellDatasetMap, Subjects, SubjectProperties, SubjectDatasetMap, Stimuli, Repros +import numpy as np + + +def _safe_get_val(dictionary:dict, key, default=None): + return dictionary[key] if key in dictionary.keys() else default + + +class Cell: + def __init__(self, cell_id=None, tuple=None): + if tuple: + self.__tuple = tuple + elif cell_id: + pattern = "cell_id like '{0:s}'".format(cell_id) + cells = (Cells & pattern) + assert (len(cells) == 1), "Cell id does not exist or is not unique!" + self.__tuple = cells.fetch(as_dict=True)[0] + else: + print("Empty Cell, not linked to any database entry!") + + @property + def cell_id(self): + return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else "" + + @property + def cell_type(self): + return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else "" + + @property + def firing_rate(self): + return self.__tuple["firing_rate"] if "firing_rate" in self.__tuple.keys() else 0.0 + + @property + def location(self): + keys = ["structure", "region", "subregion", "depth", "lateral_pos", "transversal_section"] + loc = {} + for k in keys: + if k in self.__tuple.keys(): + loc[k] = self.__tuple[k] + else: + loc[k] = "" + return loc + + @property + def subject(self): + return Subject(tuple=(Subjects & {"subject_id": self.__tuple["subject_id"]}).fetch(limit=1, as_dict=True)[0]) + + @property + def repro_runs(self): + repros = (Repros & "cell_id = '%s'" % self.cell_id) + return [RePro(tuple=r) for r in repros] + + @staticmethod + def celltypes(): + return np.unique(Cells.fetch("cell_type")) + + @staticmethod + def find_cells(cell_type=None, species=None, quality="good"): + cs = Cells * CellDatasetMap * Datasets * Subjects + if cell_type: + cs = cs & "cell_type like '{0:s}'".format(cell_type) + if species: + cs = cs & "species like '%{0:s}%'".format(species) + if quality: + cs = cs & "quality like '{0:s}'".format(quality) + return [Cell(tuple=c) for c in cs] + + def __str__(self): + str = "" + str += "Cell: %s \t type: %s\n"%(self.cell_id, self.cell_type) + return str + + +class Dataset: + def __init__(self, dataset_id=None, tuple=None): + if tuple: + self.__tuple = tuple + elif dataset_id: + pattern = "dataset_id like '{0:s}'".format(dataset_id) + dsets = (Datasets & pattern) + assert(len(dsets) == 1), "Dataset name is not unique!" + self.__tuple = dsets.fetch(limit=1)[0] + else: + print("Empty dataset, not linked to any database entry!") + + @property + def dataset_id(self): + return self.__tuple["dataset_id"] + + @property + def experimenter(self): + return self.__tuple["experimenter"] + + @property + def recording_date(self): + return self.__tuple["recording_date"] + + @property + def recording_duration(self): + return self.__tuple["duration"] + + @property + def quality(self): + return self.__tuple["quality"] + + @property + def has_nix(self): + return self.__tuple["has_nix"] + + @property + def comment(self): + return self.__tuple["comment"] + + @property + def data_source(self): + return self.__tuple["data_source"] + + @property + def setup(self): + return self.__tuple["setup"] + + @property + def cells(self): + cs = (Cells * (CellDatasetMap & self.__tuple)) + return [Cell(tuple=c) for c in cs] + + @property + def subjects(self): + subjs = (Subjects * (SubjectDatasetMap & self.__tuple)) + return [Subject(tuple=s) for s in subjs] + + @staticmethod + def find_datasets(min_duration=None, experimenter=None, quality=None): + dsets = Datasets + if min_duration: + dsets = dsets & "duration > %.2f" % min_duration + if experimenter: + dsets = dsets & "experimenter like '%{0:s}%'".format(experimenter) + if quality: + dsets = dsets & "quality like '{0:s}'".format(quality) + return [Dataset(tuple=d) for d in dsets] + + +class RePro: + def __init__(self, repro_id=None, tuple=None): + if tuple: + self.__tuple = tuple + elif repro_id: + repros = (RePro & "repro_id like '{0:s}'".format(repro_id)) + assert (len(repros) == 1), "Repro id does not exist or is not unique!" + self.__tuple = repros.fetch(limit=1, as_dict=True)[0] + else: + self.__tuple = {} + print("Empty RePro, not linked to any database entry!") + + @property + def repro_id(self): + return _safe_get_val(self.__tuple, "repro_id", "") + + @property + def run(self): + return _safe_get_val(self.__tuple, "run", -1) + + @property + def cell_id(self): + return _safe_get_val(self.__tuple, "cell_id", "") + + @property + def cell(self): + return Cell(self.cell_id) + + @property + def name(self): + return _safe_get_val(self.__tuple, "repro_name", "") + + @property + def settings(self): + return _safe_get_val(self.__tuple, "settings", "") + + @property + def start(self): + return _safe_get_val(self.__tuple, "start", 0.0) + + @property + def duration(self): + return _safe_get_val(self.__tuple, "duration", 0.0) + + @property + def stimuli(self): + stims = Stimuli & "repro_id = '%s'" % self.repro_id & "cell_id = '%s'" % self.cell_id + return [Stimulus(tuple=s) for s in stims] + + @staticmethod + def find_repros(repro_name=None, cell_id=None, settings=None): + repros = Repros & True + if repro_name: + repros = repros & "repro_name like '%{0:s}%'".format(repro_name) + if cell_id: + repros = repros & "cell_id = '%s'" % cell_id + if settings: + repros = repros & "settings like '%{0:s}%'".format(settings) + return [RePro(tuple=r) for r in repros] + + +class Stimulus: + def __init__(self, stimulus_id=None, tuple=None): + if tuple: + self.__tuple = tuple + elif stimulus_id: + stims = Stimuli & "stimulus_id = '%s'" %stimulus_id + assert(len(stims) == 1), "Stimulus_id does not exist or is not unique!" + self.__tuple = stims.fetch(limit=1, as_dict=True)[0] + else: + print("Empty RePro, not linked to any database entry!") + + def __str__(self): + str = "Stimulus %s: " % _safe_get_val(self.__tuple, "stimulus_id", "") + str += "\nStart time/index: %0.4f/%i, duration: %.3f" % (_safe_get_val(self.__tuple, "start_time", 0.0), + _safe_get_val(self.__tuple, "start_index", -1), + _safe_get_val(self.__tuple, "duration", 0.0)) + return str + + @property + def settings(self): + return _safe_get_val(self.__tuple, "settings", "") + + +class Subject: + def __init__(self, subject_id=None, tuple=None): + if tuple: + self.__tuple = tuple + elif subject_id: + self.__tuple = Subjects & "subject_id like '{0:s}'".format(subject_id).fetch()[0] + else: + self.__tuple = {} + print("Empty Subject, not linked to any database entry!") + + @property + def subject_id(self): + return self.__tuple["subject_id"] + + @property + def species(self): + return self.__tuple["species"] + + @property + def cells(self): + cs = Cells & self.__tuple + return [Cell(tuple=c) for c in cs] + + @property + def properties(self): + return (SubjectProperties & self.__tuple).fetch(as_dict=True) + + @staticmethod + def find_subjects(species=None): + subjs = Subjects & True + if species: + subjs = (Subjects & "species like '%{0:s}%'".format(species)) + return [Subject(tuple=s) for s in subjs] + + @staticmethod + def unique_species(): + all_species = (Subjects & True).fetch("species") + return np.unique(all_species) + + +if __name__ == "__main__": + from IPython import embed + cell = Cell("2010-04-16-ak") + embed() diff --git a/test.py b/test.py new file mode 100644 index 0000000..18022eb --- /dev/null +++ b/test.py @@ -0,0 +1,9 @@ +import os +import glob +import fishbook as fb +from IPython import embed +embed() + +data_dir = "/data/apteronotus" +datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-06-21-ac')) +fb.database.populate(datasets, True) \ No newline at end of file