make it a package

2019-09-18 17:13:49 +02:00
parent f6c170525b
commit c74e54e1ee
7 changed files with 338 additions and 217 deletions
--- a/fishbook/init.py
+++ b/fishbook/init.py
@@ -0,0 +1,3 @@
+from fishbook.fishbook import *
+import fishbook.database as database
+__all__ = ['fishbook', 'database']
--- a/fishbook/baseline_data.py
+++ b/fishbook/baseline_data.py
@@ -0,0 +1,98 @@
+print(__name__)
+print(__package__)
+__package__ = "fishbook"
+from fishbook.fishbook import Dataset
+#from .database.database import *
+#from .fishbook import Cell, Dataset
+
+#schema = dj.schema("fish_book", locals())
+from IPython import embed
+
+class BaselineData(object):
+
+    def __init__(self, dataset:Dataset):
+        self.__data = []
+        self.__dataset = dataset
+        self.__cell = dataset.cells[0]
+        self._get_data()
+
+    def _get_data(self):
+        if not self.__dataset:
+            self.__data = []
+        self.__data = []
+
+        repros = (Repros & self.__dataset & "repro_name like 'BaselineActivity%'")
+        for r in repros:
+            self.__data.append(self.__read_data(r))
+
+    def __read_data(self, r:Repros):
+        if self.__dataset["has_nix"]:
+            return self.__read_data_from_nix(r)
+        else:
+            return self.__read_data_from_directory(r)
+
+    @property
+    def dataset(self):
+        return self.__dataset
+
+    @property
+    def data(self, index:int=0):
+        return self.__data[0] if len(self.__data) >= index else None
+
+    @property
+    def size(self):
+        return len(self.__data)
+
+    def __str__(self):
+        str = "Baseline data of %s " % self.__dataset
+
+    def __read_data_from_nix(self, r)->np.ndarray:
+        data_source = os.path.join(self.__dataset["data_source"], self.__dataset["dataset_id"] + ".nix")
+        if not os.path.exists(data_source):
+            print("Data not found! Trying from folder")
+            return self.__read_data_from_directory(r)
+        f = nix.File.open(data_source, nix.FileMode.ReadOnly)
+        b = f.blocks[0]
+        t = b.tags[r["repro_id"]]
+        if not t:
+            print("Tag not found!")
+        data = t.retrieve_data("Spikes-1")[:]
+        f.close()
+        return data
+
+    def __read_data_from_directory(self, r)->np.ndarray:
+        data = []
+        data_source = os.path.join(self.__dataset["data_source"], "basespikes1.dat")
+        if os.path.exists(data_source):
+            found_run = False
+            with open(data_source, 'r') as f:
+                l = f.readline()
+                while l:
+                    if "index" in l:
+                        index = int(l.strip("#").strip().split(":")[-1])
+                        found_run = index == r["run"]
+                    if l.startswith("#Key") and found_run:
+                        data = self.__do_read(f)
+                        break
+                    l = f.readline()
+        return data
+
+    def __do_read(self, f)->np.ndarray:
+        data = []
+        f.readline()
+        f.readline()
+        l = f.readline()
+        while l and "#" not in l and len(l.strip()) > 0:
+            data.append(float(l.strip()))
+            l = f.readline()
+        return np.asarray(data)
+
+
+#if __name__ == "__main__":
+print("Test")
+embed()
+exit()
+
+dataset = Dataset(tuple=(Datasets & "dataset_id like '2018-11-09-aa-%'").fetch(limit=1, as_dict=True))
+baseline = BaselineData(dataset)
+embed()
--- a/fishbook/database/init.py
+++ b/fishbook/database/init.py
@@ -0,0 +1,3 @@
+from .database import *
+
+__all__ = ['database']
--- a/fishbook/database/database.py
+++ b/fishbook/database/database.py
@@ -0,0 +1,533 @@
+import numpy as np
+import datajoint as dj
+import nixio as nix
+import os
+import glob
+from .util import read_info_file, read_dataset_info, read_stimuli_file
+from .util import find_key_recursive, deep_get, find_mtags_for_tag
+from .util import mtag_settings_to_yaml, nix_metadata_to_yaml
+import uuid
+import yaml
+
+schema = dj.schema("fish_book", locals())
+
+
+@schema
+class Datasets(dj.Manual):
+    definition = """ # _Dataset
+       dataset_id : varchar(256)
+       ----
+       data_source : varchar(512) # path to the dataset
+       experimenter : varchar(512)
+       setup : varchar(128) 
+       recording_date : date
+       quality : varchar(512)
+       comment : varchar(1024)
+       duration : float
+       has_nix : bool
+       """
+
+    @staticmethod
+    def get_template_tuple(id=None):
+        if id is not None:
+            d = dict((Datasets() & {"dataset_id": id}).fetch1())
+            return d
+        return dict(dataset_id=None, data_source="", experimenter="", setup="", recording_date=None,
+                    quality="", comment="", duration=0.0, has_nix=False)
+
+    @staticmethod
+    def get_nix_file(key):
+        dset = (Datasets() & key).fetch1()
+        if dset["ignore"]:
+            return None
+        file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix")
+        if not (os.path.exists(file_path)):
+            print("\t No nix file found for path: %s" % dset["data_source"])
+            return None
+        if not Datasets.check_file_integrity(file_path):
+            return None
+        return file_path
+
+    @staticmethod
+    def check_file_integrity(nix_file):
+        sane = True
+        try:
+            f = nix.File.open(nix_file, nix.FileMode.ReadOnly)
+            b = f.blocks[0]
+            m = b.metadata
+            if "Recording" not in m.sections:
+                Warning("\t Could not find Recording section in dataset: %s" % nix_file)
+                sane = False
+            f.close()
+        except ():
+            f = None
+            print("file: %s is NOT SANE!")
+            sane = False
+        return sane
+
+
+@schema
+class Subjects(dj.Manual):
+    definition = """
+    # Subjects
+    subject_id : varchar(256)
+    ----
+    species : varchar(256)
+    """
+
+    @staticmethod
+    def get_template_tuple(subject_id=None):
+        tup = dict(subject_id=None, species="")
+        if subject_id is not None:
+            d = dict((Subjects() & {"subject_id": subject_id}).fetch1())
+            return d
+        return tup
+
+    def make(self, key):
+        file_path = Datasets.get_nix_file(key)
+        if file_path is None:
+            return
+        nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly)
+        m = nix_file.blocks[0].metadata
+        inserts = Subjects.get_template_tuple()
+        subj_info = m["Recording"]["Subject"]
+        inserts["subject_id"] = subj_info["Identifier"]
+        inserts["species"] = subj_info["Species"][0]
+        inserts["weight"] = subj_info["Weight"]
+        inserts["size"] = subj_info["Size"]
+        inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10
+        inserts.update(key)
+        self.insert1(inserts, skip_duplicates=True)
+        nix_file.close()
+
+     #@property
+    #def datasets(self):
+    #    retrun
+
+
+@schema
+class SubjectDatasetMap(dj.Manual):
+    definition = """
+    # SubjectDatasetMap
+    -> Subjects
+    -> Datasets
+    """
+
+
+@schema
+class SubjectProperties(dj.Manual):
+    definition = """
+    # _SubjectProperties
+    id : int auto_increment
+    ----
+    -> Subjects
+    recording_date : date
+    weight : float
+    size : float
+    eod_frequency : float
+    """
+
+    def get_template_tuple(id=None):
+        tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0,
+                   eod_frequency=0.0)
+        if id is not None:
+            return dict((SubjectProperties() & {"id": id}).fetch1())
+        return tup
+
+
+@schema
+class Cells(dj.Manual):
+    definition = """
+    # Table that stores information about recorded cells.
+    cell_id : varchar(256)
+    ----
+    -> Subjects
+    cell_type : varchar(256)
+    firing_rate : float
+    structure : varchar(256)
+    region : varchar(256)
+    subregion : varchar(256)
+    depth : float
+    lateral_pos : float
+    transversal_section : float
+    """
+
+    @staticmethod
+    def get_template_tuple(cell_id=None):
+        tup = dict(cell_id=None, subject_id=None, cell_type="", firing_rate=0.0,
+                   depth=0.0, region="", subregion="", structure="",
+                   lateral_pos=0.0, transversal_section=0.0)
+        if cell_id is not None:
+            d = dict((Cells() & {"cell_id": cell_id}).fetch1())
+            return d
+        return tup
+
+
+@schema
+class CellDatasetMap(dj.Manual):
+    definition = """
+    # Table that maps recorded cells to datasets
+    -> Datasets
+    -> Cells
+    """
+
+
+@schema
+class Repros(dj.Manual):
+    definition = """
+    repro_id : varchar(512)     # The name that was given to the RePro run by relacs
+    run : smallint              # A counter counting the runs of the ReProp in this dataset
+    -> Cells                    # 
+    ----
+    repro_name : varchar(512)   # The original name of the RePro itself, not any given name by user or relacs
+    settings : varchar(3000)    # Yaml formatted string containing the repro settings (tag.metadata in case of a nix file)
+    start : float               # The start time of the repro
+    duration : float            # The duration of the repro
+    """
+
+    @staticmethod
+    def get_template_tuple(repro_id=None):
+        tup = dict(repro_id=None, cell_id=None, run=0, repro_name="", settings=None, start=None, duration=None)
+        if repro_id is not None:
+            d = dict((Repros() & {"repro_id": repro_id}).fetch1())
+            return d
+        return tup
+
+
+@schema
+class Stimuli(dj.Manual):
+    definition = """
+    stimulus_id : varchar(50)
+    -> Repros
+    ---
+    stimulus_index : int
+    stimulus_name : varchar(512)
+    mtag_id : varchar(50)
+    start_time : float
+    start_index : int
+    duration : float
+    settings : varchar(3000)
+    """
+
+    @staticmethod
+    def get_template_tuple(stimulus_id=None):
+        if stimulus_id is not None:
+            tup = dict((Stimuli & {"stimulus_id": stimulus_id}).fetch1())
+        else:
+            tup = dict(stimulus_id=None, stimulus_index=None, stimulus_name="", start_index=0, start_time=0.0,
+                       duration=0.0, settings=None)
+        return tup
+
+
+def populate_datasets(data_path, update=False):
+    if not os.path.exists(data_path):
+        return False
+    dset_name = os.path.split(data_path)[-1]
+    experimenter, rec_date, quality, comment, has_nix, rec_duration, setup = read_dataset_info(os.path.join(data_path, 'info.dat'))
+    if not experimenter:
+        return False
+
+    inserts = Datasets.get_template_tuple()
+    inserts["dataset_id"] = dset_name
+    inserts["data_source"] = data_path
+    inserts["experimenter"] = experimenter
+    inserts["recording_date"] = rec_date
+    inserts["quality"] = quality if not isinstance(quality, dict) else ""
+    inserts["comment"] = comment if not isinstance(comment, dict) else ""
+    inserts["duration"] = rec_duration
+    inserts["setup"] = setup
+    inserts["has_nix"] = has_nix
+    if len(Datasets & "dataset_id like '%s'" % inserts["dataset_id"]) > 0 and not update:
+        print('\t\t %s is already in database!' % dset_name)
+        return False
+    Datasets().insert1(inserts, skip_duplicates=True)
+    return True
+
+
+def populate_subjects(data_path):
+    print("\tImporting subject(s) of %s" % data_path)
+    dset_name = os.path.split(data_path)[-1]
+    info_file = os.path.join(data_path, 'info.dat')
+    if not os.path.exists(info_file):
+        return None, None, False
+    info = read_info_file(info_file)
+
+    p = []
+    find_key_recursive(info, "Subject", p)
+    subj = {}
+    if len(p) > 0:
+        subj = deep_get(info, p)
+
+    inserts = Subjects.get_template_tuple()
+    subj_id = None
+    if "Identifier" in subj.keys():
+        if isinstance(subj["Identifier"], dict):
+            subj_id = "unspecified_" + dset_name
+        else:
+            subj_id = subj["Identifier"]
+    elif "Identifier" in info.keys():
+        if isinstance(info["Identifier"], dict):
+            subj_id = "unspecified_" + dset_name
+        else:
+            subj_id = info["Identifier"]
+    else:
+        subj_id = "unspecified_" + dset_name
+    inserts["subject_id"] = subj_id
+    inserts["species"] = subj["Species"]
+    Subjects().insert1(inserts, skip_duplicates=True)
+
+    # multi match entry
+    dataset = dict((Datasets() & {"dataset_id": dset_name}).fetch1())
+    mm = dict(dataset_id=dataset["dataset_id"], subject_id=inserts["subject_id"])
+    SubjectDatasetMap.insert1(mm, skip_duplicates=True)
+
+    # subject properties
+    props = SubjectProperties.get_template_tuple()
+    props["subject_id"] = inserts["subject_id"]
+    props["recording_date"] = dataset["recording_date"]
+    if "Weight" in subj.keys():
+        props["weight"] = np.round(float(subj["Weight"][:-1]), 1)
+    if "Size" in subj.keys():
+        props["size"] = np.round(float(subj["Size"][:-2]), 1)
+    if "EOD Frequency" in subj.keys():
+        props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2]))
+    p = props.copy()
+    p.pop("id")
+    if len(SubjectProperties & p) == 0:
+        SubjectProperties.insert1(props, skip_duplicates=True)
+
+
+def populate_cells(data_path):
+    print("\tImporting cell(s) of %s" % data_path)
+    dset_name = os.path.split(data_path)[-1]
+    info_file = os.path.join(data_path, 'info.dat')
+    if not os.path.exists(info_file):
+        return None, None, False
+    info = read_info_file(info_file)
+    p = []
+    find_key_recursive(info, "Subject", p)
+    subject_info = deep_get(info, p)
+
+    p = []
+    find_key_recursive(info, "Cell", p)
+    cell_info = deep_get(info, p)
+
+    p = []
+    find_key_recursive(info, "Firing Rate1", p)
+    firing_rate = deep_get(info, p, default=0.0)
+    if isinstance(firing_rate, str):
+        firing_rate = float(firing_rate[:-2])
+
+    subj_id = None
+    if "Identifier" in subject_info.keys():
+        if isinstance(subject_info["Identifier"], dict):
+            subj_id = "unspecified_" + dset_name
+        else:
+            subj_id = subject_info["Identifier"]
+    elif "Identifier" in info.keys():
+        if isinstance(info["Identifier"], dict):
+            subj_id = "unspecified_" + dset_name
+        else:
+            subj_id = info["Identifier"]
+    else:
+        subj_id = "unspecified_" + dset_name
+    dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1())
+    subject = dict((Subjects & {"subject_id": subj_id}).fetch1())
+
+    dataset_id = dataset["dataset_id"]
+    cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id
+    cell_props = Cells.get_template_tuple()
+    cell_props["subject_id"] = subject["subject_id"]
+    cell_props["cell_id"] = cell_id
+    cell_props["cell_type"] = cell_info["CellType"]
+    cell_props["firing_rate"] = firing_rate
+    if "Structure" in cell_info.keys():
+        cell_props["structure"] = cell_info["Structure"]
+    if "BrainRegion" in cell_info.keys():
+        cell_props["region"] = cell_info["BrainRegion"]
+    if "BrainSubRegion" in cell_info.keys():
+        cell_props["subregion"] = cell_info["BrainSubRegion"]
+    if "Depth" in cell_info.keys():
+        cell_props["depth"] = float(cell_info["Depth"][:-2])
+    if "Lateral position" in cell_info.keys():
+        cell_props["lateral_pos"] = float(cell_info["Lateral position"][:-2])
+    if "Transverse section" in cell_info.keys():
+        cell_props["transversal_section"] = float(cell_info["Transverse section"])
+
+    Cells.insert1(cell_props, skip_duplicates=True)
+
+    # multi mach entry
+    mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"])
+    CellDatasetMap.insert1(mm, skip_duplicates=True)
+
+
+def scan_nix_file_for_repros(dataset):
+    print("\t\tscanning nix file")
+    cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
+    nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix"))
+    for nf in nix_files:
+        if not Datasets.check_file_integrity(nf):
+            print("\t\tfile is not sane!!!")
+            continue
+        f = nix.File.open(nf, nix.FileMode.ReadOnly)
+        b = f.blocks[0]
+        repro_runs = [t for t in b.tags if "relacs.repro_run" in t.type]
+        for t in repro_runs:
+            rs = t.metadata.find_sections(lambda x: "Run" in x.props)
+            if len(rs) == 0:
+                continue
+            rs = rs[0]
+            print("\t\t%s" % rs["RePro"])
+
+            rp = Repros.get_template_tuple()
+            rp["run"] = rs["Run"]
+            rp["repro_name"] = rs["RePro"]
+            rp["cell_id"] = cell_id
+            rp["repro_id"] = t.name
+            settings = t.metadata.find_sections(lambda x: "settings" in x.type)
+            if len(settings) > 0:
+                rp["settings"] = nix_metadata_to_yaml(settings[0])
+            else:
+                rp["settings"] = nix_metadata_to_yaml(t.metadata)
+            rp["start"] = t.position[0]
+            rp["duration"] = t.extent[0]
+            Repros.insert1(rp, skip_duplicates=True)
+
+            # import Stimuli
+            repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1())
+            repro.pop("settings")
+            repro.pop("repro_name")
+            repro.pop("start")
+            repro.pop("duration")
+
+            mtags, positions = find_mtags_for_tag(b, t)
+            for i, mt in enumerate(mtags):
+                mt_positions = np.atleast_2d(mt.positions[:]).T
+                mt_extents = np.atleast_2d(mt.extents[:]).T
+                for p in positions[i]:
+                    settings = mtag_settings_to_yaml(mt, p)
+                    stim_start = mt_positions[p, 0]
+                    stim_duration = mt_extents[p, 0]
+
+                    stim = Stimuli.get_template_tuple()
+                    stim["stimulus_id"] = str(uuid.uuid1())
+                    stim["stimulus_index"] = p
+                    stim["start_time"] = stim_start
+                    stim["start_index"] = -1
+                    stim["duration"] = stim_duration
+                    stim["settings"] = settings
+                    stim["mtag_id"] = mt.id
+                    stim["stimulus_name"] = mt.name
+                    stim.update(repro)
+                    Stimuli.insert1(stim, skip_duplicates=True)
+        f.close()
+        f = None
+
+
+def scan_folder_for_repros(dataset):
+    print("\t\tNo nix-file, scanning directory!")
+    repro_settings, stim_indices = read_stimuli_file(dataset["data_source"])
+    repro_counts = {}
+    cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
+    for i, (rs, si) in enumerate(zip(repro_settings, stim_indices)):
+        rp = Repros.get_template_tuple()
+        path = []
+        if not find_key_recursive(rs, "run", path):
+            find_key_recursive(rs, "Run", path)
+        if len(path) > 0:
+            rp["run"] = deep_get(rs, path, 0)
+        else:
+            rp["run"] = -1
+
+        path = []
+        if not find_key_recursive(rs, "repro", path):
+            find_key_recursive(rs, "RePro", path)
+        print("\t\t %s" % deep_get(rs, path, "None"))
+        rp["repro_name"] = deep_get(rs, path, "None")
+
+        path = []
+        if rp["repro_name"] in repro_counts.keys():
+            repro_counts[rp["repro_name"]] += 1
+        else:
+            repro_counts[rp["repro_name"]] = 1
+        rp["cell_id"] = cell_id
+        rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]])
+        rp["start"] = 0.
+        rp["duration"] = 0.
+        rp["settings"] = yaml.dump(rs)
+        Repros.insert1(rp, skip_duplicates=True)
+
+        # import stimuli
+        repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1())
+        repro.pop("settings")
+        repro.pop("repro_name")
+        repro.pop("start")
+        repro.pop("duration")
+        for j, k in enumerate(si.keys()):
+            s = int(si[k])
+            stim_start = 0.
+            path = []
+            if not find_key_recursive(rs, "duration", path):
+                find_key_recursive(rs, "Duration", path)
+            if len(path) > 0 :
+                stim_duration = deep_get(rs, path, None)
+                if "ms" in stim_duration:
+                    stim_duration = float(stim_duration[:stim_duration.index("ms")])
+                else:
+                    stim_duration = float(stim_duration[:stim_duration.index("s")])
+            else:
+                stim_duration = 0.0
+
+            stim = Stimuli.get_template_tuple()
+            stim["stimulus_id"] = str(uuid.uuid1())
+            stim["stimulus_index"] = j
+            stim["start_time"] = stim_start
+            stim["start_index"] = s
+            stim["duration"] = stim_duration
+            stim["settings"] = yaml.dump(rs)
+            stim["mtag_id"] = ""
+            stim["stimulus_name"] = ""
+            stim.update(repro)
+            Stimuli.insert1(stim, skip_duplicates=True)
+
+
+def populate_repros(data_path):
+    print("\tImporting RePro(s) of %s" % data_path)
+    dset_name = os.path.split(data_path)[-1]
+    if len(Datasets & {"dataset_id": dset_name}) != 1:
+        return False
+    dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1())
+
+    if dataset["has_nix"]:
+        scan_nix_file_for_repros(dataset)
+    else:
+        scan_folder_for_repros(dataset)
+    return True
+
+
+def drop_tables():
+    Datasets.drop()
+    Subjects.drop()
+
+
+def populate(datasets, update=False):
+    for i, d in enumerate(datasets):
+        print("Importing %i of %i: %s" % (i, len(datasets), d))
+        if not populate_datasets(d, update):
+            continue
+        populate_subjects(d)
+        populate_cells(d)
+        try:
+            populate_repros(d)
+        except ():
+            print("\t\tsomething went wrong! %s" % d)
+
+
+if __name__ == "__main__":
+    data_dir = "/data/apteronotus"
+    # data_dir = "../high_freq_chirps/data"
+    # drop_tables()
+    # datasets = glob.glob("/Users/jan/zwischenlager/2012-*")2010-06-21-ac/info.dat
+    datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2011-*'))
+    populate(datasets, update=False)
+
--- a/fishbook/database/util.py
+++ b/fishbook/database/util.py
@@ -0,0 +1,306 @@
+from functools import reduce
+import numpy as np
+import nixio as nix
+import re
+import os
+import glob
+import datetime as dt
+import subprocess
+from IPython import embed
+
+
+def read_info_file(file_name):
+    """
+    Reads the info file and returns the stored metadata in a dictionary. The dictionary may be nested.
+    @param file_name:  The name of the info file.
+    @return: dictionary, the stored information.
+    """
+    root = {}
+
+    try:
+        with open(file_name, 'r') as f:
+            lines = f.readlines()
+    except UnicodeDecodeError:
+        print("Replacing experimenter!!!")
+        command = "sudo sed -i '/Experimenter/c\#       Experimenter: Anna Stoeckl' %s" % file_name
+        subprocess.check_call(command, shell=True)
+        with open(file_name, 'r') as f:
+            lines = f.readlines()
+    for l in lines:
+        if not l.startswith("#"):
+            continue
+        l = l.strip("#").strip()
+        if len(l) == 0:
+            continue
+        if not ": " in l:  # subsection
+            sec = {}
+            root[l[:-1] if l.endswith(":") else l] = sec
+        else:
+            parts = l.split(': ')
+            sec[parts[0].strip()] = parts[1].strip('"').strip()
+    return root
+
+
+def parse_metadata_line(line):
+    if not line.startswith("#"):
+        return None, None
+
+    line = line.strip("#").strip()
+    parts = line.split(":")
+    if len(parts) == 0:
+        return None, None
+    if len(parts) == 1 or len(parts[-1].strip()) == 0:
+        return parts[0].strip(), None
+    else:
+        return parts[0].strip(), parts[-1].strip()
+
+
+def has_signal(line, col_names):
+    """
+    Checks whether a signal/stimulus was given in the line.
+    :param line: the current line of the data table
+    :param col_names: The names of the table header columns
+    :return: whether or not any of the signal entries is not empty ("-")
+    """
+    values = line.split()
+    for i, n in enumerate(col_names):
+        if n.lower() == "signal" and i < len(values):
+            if len(values[i].strip()) > 0 and values[i].strip()[0] != "-":
+                return True
+    return False
+
+
+def parse_table(lines, start_index):
+    """
+
+    :param lines:
+    :param start_index:
+    :return:
+    """
+    data_indices = {}
+    stim_count = 0
+    names = re.split(r'\s{2,}', lines[start_index + 3][1:].strip())
+    while start_index < len(lines):
+        l = lines[start_index].strip()
+        if l.startswith("#"):  # ignore
+            start_index += 1
+        elif len(l) > 0:
+            if stim_count == 0 and (has_signal(l, names)):
+                data_indices[stim_count] = l.split()[0]
+                stim_count += 1
+            elif stim_count > 0:
+                data_indices[stim_count] = l.split()[0]
+                stim_count += 1
+            start_index += 1
+        else:
+            start_index += 1
+            break
+    return data_indices, start_index
+
+
+def read_stimuli_file(dataset):
+    repro_settings = []
+    stimulus_indices = []
+    settings = {}
+    with open(os.path.join(dataset, 'stimuli.dat'), 'r') as f:
+        lines = f.readlines()
+        index = 0
+        current_section = None
+        current_section_name = ""
+        while index < len(lines):
+            l = lines[index].strip()
+            if len(l) == 0:
+                index += 1
+            elif l.startswith("#") and "key" not in l.lower():
+                name, value = parse_metadata_line(l)
+                if not name:
+                    continue
+                if name and not value:
+                    if current_section:
+                        settings[current_section_name] = current_section.copy()
+
+                    current_section = {}
+                    current_section_name = name
+                else:
+                    current_section[name] = value
+                index += 1
+            elif l.lower().startswith("#key"):  # table data coming
+                data, index = parse_table(lines, index)
+                # we are done with this repro run
+                stimulus_indices.append(data)
+                settings[current_section_name] = current_section.copy()
+                repro_settings.append(settings.copy())
+                current_section = None
+                settings = {}
+            else:
+                # data lines, ignore them here
+                index += 1
+    return repro_settings, stimulus_indices
+
+
+def find_key_recursive(dictionary, key, path=[]):
+    assert(isinstance(dictionary, dict))
+    if key in dictionary.keys():
+        path.append(key)
+        return True
+    for k in dictionary.keys():
+        if isinstance(dictionary[k], dict):
+            if find_key_recursive(dictionary[k], key, path):
+                path.insert(-1, k)
+                break
+    return len(path) > 0
+
+
+def deep_get(dictionary, keys, default=None):
+    assert(isinstance(dictionary, dict))
+    assert(isinstance(keys, list))
+    return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys, dictionary)
+
+
+def _get_string(dictionary: dict, key:str, alt_key=None, default=None):
+    p = []
+    value = default
+    find_key_recursive(dictionary, key, p)
+    if len(p) > 0:
+        value = deep_get(dictionary, p, default)
+    elif alt_key:
+        find_key_recursive(dictionary, alt_key, p)
+        value = deep_get(dictionary, p, default)
+    if default and value != default and isinstance(value, dict):
+        value = default
+    return value
+
+
+def _get_date(dictionary: dict, key: str, alt_key=None, default=None):
+    p = []
+    value = default
+    find_key_recursive(dictionary, key, p)
+    if len(p) > 0:
+        value = dt.date.fromisoformat(deep_get(dictionary, p, default))
+    elif alt_key:
+        find_key_recursive(dictionary, alt_key, p)
+        value = dt.date.fromisoformat(deep_get(dictionary, p, default))
+    if value != default and isinstance(value, dict):
+        value = default
+    return value
+
+
+def read_dataset_info(info_file):
+    exp = ""
+    quality = ""
+    comment = ""
+    rec_date = None
+    has_nix = False
+    setup = ""
+    rec_duration = 0.0
+    if not os.path.exists(info_file):
+        return exp, rec_date, quality, comment, has_nix, rec_duration, setup
+    has_nix = len(glob.glob(os.path.sep.join(info_file.split(os.path.sep)[:-1]) + os.path.sep + "*.nix")) > 0
+    info = read_info_file(info_file)
+    p = []
+    exp = _get_string(info, "Experimenter")
+    rec_date = _get_date(info, "Date")
+    quality = _get_string(info, "Recording quality")
+    comment = _get_string(info, "Comment", default="")
+    rec_duration = _get_string(info, "Recording duration", "Recording duratio", default=0.0)
+
+    if rec_duration != 0.0 and isinstance(rec_duration, str) and "min" in rec_duration:
+        rec_duration = rec_duration[:-3]
+    elif isinstance(rec_duration, dict):
+        rec_duration = 0.0
+    setup_info = _get_string(info, "Setup", default=None)
+    if setup_info and isinstance(setup_info, dict):
+        setup = _get_string(setup_info, "Identifier")
+    return exp, rec_date, quality, comment, has_nix, rec_duration, setup
+
+
+def nix_metadata_to_dict(section):
+    info = {}
+    for p in section.props:
+        info[p.name] = [v.value for v in p.values]
+    for s in section.sections:
+        info[s.name] = nix_metadata_to_dict(s)
+    return info
+
+
+def nix_metadata_to_yaml(section, cur_depth=0, val_count=1):
+    assert(isinstance(section, nix.section.SectionMixin))
+    yaml = "%s%s:\n" % ("\t" * cur_depth, section.name)
+    for p in section.props:
+        val_str = ""
+        if val_count > 1 and len(p.values) > 1:
+            val_str = "[" + ', '.join([v.to_string() for v in p.values]) + "]"
+        elif len(p.values) == 1:
+            val_str = p.values[0].to_string()
+        yaml += "%s%s: %s\n" % ("\t" * (cur_depth+1), p.name, val_str)
+    for s in section.sections:
+        yaml += nix_metadata_to_yaml(s, cur_depth+1)
+    return yaml
+
+
+def find_mtags_for_tag(block, tag):
+    """
+        Finds those multi tags and the respective positions within that match to a certain
+        repro run.
+
+        @:returns list of mtags, list of mtag positions
+    """
+    assert(isinstance(block, nix.pycore.block.Block))
+    assert(isinstance(tag, nix.pycore.tag.Tag))
+    mtags = []
+    indices = []
+    tag_start = np.atleast_1d(tag.position)
+    tag_end = tag_start + np.atleast_1d(tag.extent)
+    for mt in block.multi_tags:
+        position_count = mt.positions.shape[0]
+        in_tag_positions = []
+        for i in range(position_count):
+            mt_start = np.atleast_1d(mt.positions[i, :])
+            mt_end = mt_start + np.atleast_1d(mt.extents[i, :])
+
+            for j in range(len(tag_start)):
+                if mt_start[j] >= tag_start[j] and mt_end[j] <= tag_end[j]:
+                    in_tag_positions.append(i)
+        if len(in_tag_positions) > 0:
+            mtags.append(mt)
+            indices.append(in_tag_positions)
+    return mtags, indices
+
+
+def mtag_settings_to_yaml(mtag, pos_index):
+    assert(isinstance(mtag, nix.pycore.multi_tag.MultiTag))
+    assert(0 <= pos_index < mtag.positions.shape[0])
+
+    yaml = ""
+    if mtag.metadata is not None:
+        yaml = nix_metadata_to_yaml(mtag.metadata)
+    for i in range(len(mtag.features)):
+        feat = mtag.features[i]
+        feat_data = mtag.retrieve_feature_data(pos_index, i)
+
+        if len(feat_data.shape) == 1:
+            feat_name = feat.data.label if feat.data.label and len(feat.data.label) > 0 else feat.data.name
+            feat_unit = feat.data.unit if feat.data.unit and len(feat.data.unit) > 0 else ""
+            if feat_data.shape[0] == 1:
+                feat_content = "%s %s" % (feat_data[0], feat_unit)
+            else:
+                feat_content = "[" + ','.join(map(str, feat_data[:])) + "] %s" % feat_unit
+            yaml += "\t%s: %s\n" % (feat_name, feat_content)
+    return yaml
+
+
+if __name__ == "__main__":
+    """
+    nix_file = "../../science/high_freq_chirps/data/2018-11-09-aa-invivo-1/2018-11-09-aa-invivo-1.nix"
+    f = nix.File.open(nix_file, nix.FileMode.ReadOnly)
+    b = f.blocks[0]
+    yml = nix_metadata_to_yaml(b.tags[0].metadata)
+    print(yml)
+    print("-"* 80)
+    print(nix_metadata_to_yaml(b.metadata))
+    embed()
+    f.close()
+    """
+    dataset = "/Users/jan/zwischenlager/2012-03-23-ad"
+    settings = read_stimuli_file(os.path.join(dataset, "stimuli.dat"))
+    embed()
--- a/fishbook/fishbook.py
+++ b/fishbook/fishbook.py
@@ -0,0 +1,271 @@
+from .database.database import Cells, Datasets, CellDatasetMap, Subjects, SubjectProperties, SubjectDatasetMap, Stimuli, Repros
+import numpy as np
+
+
+def _safe_get_val(dictionary:dict, key, default=None):
+    return dictionary[key] if key in dictionary.keys() else default
+
+
+class Cell:
+    def __init__(self, cell_id=None, tuple=None):
+        if tuple:
+            self.__tuple = tuple
+        elif cell_id:
+            pattern = "cell_id like '{0:s}'".format(cell_id)
+            cells = (Cells & pattern)
+            assert (len(cells) == 1), "Cell id does not exist or is not unique!"
+            self.__tuple = cells.fetch(as_dict=True)[0]
+        else:
+            print("Empty Cell, not linked to any database entry!")
+
+    @property
+    def cell_id(self):
+        return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else ""
+
+    @property
+    def cell_type(self):
+        return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else ""
+
+    @property
+    def firing_rate(self):
+        return self.__tuple["firing_rate"] if "firing_rate" in self.__tuple.keys() else 0.0
+
+    @property
+    def location(self):
+        keys = ["structure", "region", "subregion", "depth", "lateral_pos", "transversal_section"]
+        loc = {}
+        for k in keys:
+            if k in self.__tuple.keys():
+                loc[k] = self.__tuple[k]
+            else:
+                loc[k] = ""
+        return loc
+
+    @property
+    def subject(self):
+        return Subject(tuple=(Subjects & {"subject_id": self.__tuple["subject_id"]}).fetch(limit=1, as_dict=True)[0])
+
+    @property
+    def repro_runs(self):
+        repros = (Repros & "cell_id = '%s'" % self.cell_id)
+        return [RePro(tuple=r) for r in repros]
+
+    @staticmethod
+    def celltypes():
+        return np.unique(Cells.fetch("cell_type"))
+
+    @staticmethod
+    def find_cells(cell_type=None, species=None, quality="good"):
+        cs = Cells * CellDatasetMap * Datasets * Subjects
+        if cell_type:
+            cs = cs & "cell_type like '{0:s}'".format(cell_type)
+        if species:
+            cs = cs & "species like '%{0:s}%'".format(species)
+        if quality:
+            cs = cs & "quality like '{0:s}'".format(quality)
+        return [Cell(tuple=c) for c in cs]
+
+    def __str__(self):
+        str = ""
+        str += "Cell: %s \t type: %s\n"%(self.cell_id, self.cell_type)
+        return str
+
+
+class Dataset:
+    def __init__(self, dataset_id=None, tuple=None):
+        if tuple:
+            self.__tuple = tuple
+        elif dataset_id:
+            pattern = "dataset_id like '{0:s}'".format(dataset_id)
+            dsets = (Datasets & pattern)
+            assert(len(dsets) == 1), "Dataset name is not unique!"
+            self.__tuple = dsets.fetch(limit=1)[0]
+        else:
+            print("Empty dataset, not linked to any database entry!")
+
+    @property
+    def dataset_id(self):
+        return self.__tuple["dataset_id"]
+
+    @property
+    def experimenter(self):
+        return self.__tuple["experimenter"]
+
+    @property
+    def recording_date(self):
+        return self.__tuple["recording_date"]
+
+    @property
+    def recording_duration(self):
+        return self.__tuple["duration"]
+
+    @property
+    def quality(self):
+        return self.__tuple["quality"]
+
+    @property
+    def has_nix(self):
+        return self.__tuple["has_nix"]
+
+    @property
+    def comment(self):
+        return self.__tuple["comment"]
+
+    @property
+    def data_source(self):
+        return self.__tuple["data_source"]
+
+    @property
+    def setup(self):
+        return self.__tuple["setup"]
+
+    @property
+    def cells(self):
+        cs = (Cells * (CellDatasetMap & self.__tuple))
+        return [Cell(tuple=c) for c in cs]
+
+    @property
+    def subjects(self):
+        subjs = (Subjects * (SubjectDatasetMap & self.__tuple))
+        return [Subject(tuple=s) for s in subjs]
+
+    @staticmethod
+    def find_datasets(min_duration=None, experimenter=None, quality=None):
+        dsets = Datasets
+        if min_duration:
+            dsets = dsets & "duration > %.2f" % min_duration
+        if experimenter:
+            dsets = dsets & "experimenter like '%{0:s}%'".format(experimenter)
+        if quality:
+            dsets = dsets & "quality like '{0:s}'".format(quality)
+        return [Dataset(tuple=d) for d in dsets]
+
+
+class RePro:
+    def __init__(self, repro_id=None, tuple=None):
+        if tuple:
+            self.__tuple = tuple
+        elif repro_id:
+            repros = (RePro & "repro_id like '{0:s}'".format(repro_id))
+            assert (len(repros) == 1), "Repro id does not exist or is not unique!"
+            self.__tuple = repros.fetch(limit=1, as_dict=True)[0]
+        else:
+            self.__tuple = {}
+            print("Empty RePro, not linked to any database entry!")
+
+    @property
+    def repro_id(self):
+        return _safe_get_val(self.__tuple, "repro_id", "")
+
+    @property
+    def run(self):
+        return _safe_get_val(self.__tuple, "run", -1)
+
+    @property
+    def cell_id(self):
+        return _safe_get_val(self.__tuple, "cell_id", "")
+
+    @property
+    def cell(self):
+        return Cell(self.cell_id)
+
+    @property
+    def name(self):
+        return _safe_get_val(self.__tuple, "repro_name", "")
+
+    @property
+    def settings(self):
+        return _safe_get_val(self.__tuple, "settings", "")
+
+    @property
+    def start(self):
+        return _safe_get_val(self.__tuple, "start", 0.0)
+
+    @property
+    def duration(self):
+        return _safe_get_val(self.__tuple, "duration", 0.0)
+
+    @property
+    def stimuli(self):
+        stims = Stimuli & "repro_id = '%s'" % self.repro_id & "cell_id = '%s'" % self.cell_id
+        return [Stimulus(tuple=s) for s in stims]
+
+    @staticmethod
+    def find_repros(repro_name=None, cell_id=None, settings=None):
+        repros = Repros & True
+        if repro_name:
+            repros = repros & "repro_name like '%{0:s}%'".format(repro_name)
+        if cell_id:
+            repros = repros & "cell_id = '%s'" % cell_id
+        if settings:
+            repros = repros & "settings like '%{0:s}%'".format(settings)
+        return [RePro(tuple=r) for r in repros]
+
+
+class Stimulus:
+    def __init__(self, stimulus_id=None, tuple=None):
+        if tuple:
+            self.__tuple = tuple
+        elif stimulus_id:
+            stims = Stimuli & "stimulus_id = '%s'" %stimulus_id
+            assert(len(stims) == 1), "Stimulus_id does not exist or is not unique!"
+            self.__tuple = stims.fetch(limit=1, as_dict=True)[0]
+        else:
+            print("Empty RePro, not linked to any database entry!")
+
+    def __str__(self):
+        str = "Stimulus %s: " % _safe_get_val(self.__tuple, "stimulus_id", "")
+        str += "\nStart time/index: %0.4f/%i, duration: %.3f" % (_safe_get_val(self.__tuple, "start_time", 0.0),
+                                                                 _safe_get_val(self.__tuple, "start_index", -1),
+                                                                 _safe_get_val(self.__tuple, "duration", 0.0))
+        return str
+
+    @property
+    def settings(self):
+        return _safe_get_val(self.__tuple, "settings", "")
+
+
+class Subject:
+    def __init__(self, subject_id=None, tuple=None):
+        if tuple:
+            self.__tuple = tuple
+        elif subject_id:
+            self.__tuple = Subjects & "subject_id like '{0:s}'".format(subject_id).fetch()[0]
+        else:
+            self.__tuple = {}
+            print("Empty Subject, not linked to any database entry!")
+
+    @property
+    def subject_id(self):
+        return self.__tuple["subject_id"]
+
+    @property
+    def species(self):
+        return self.__tuple["species"]
+
+    @property
+    def cells(self):
+        cs = Cells & self.__tuple
+        return [Cell(tuple=c) for c in cs]
+
+    @property
+    def properties(self):
+        return (SubjectProperties & self.__tuple).fetch(as_dict=True)
+
+    @staticmethod
+    def find_subjects(species=None):
+        subjs = Subjects & True
+        if species:
+            subjs = (Subjects & "species like '%{0:s}%'".format(species))
+        return [Subject(tuple=s) for s in subjs]
+
+    @staticmethod
+    def unique_species():
+        all_species = (Subjects & True).fetch("species")
+        return np.unique(all_species)
+
+
+if __name__ == "__main__":
+    from IPython import embed
+    cell = Cell("2010-04-16-ak")
+    embed()