diff --git a/README.md b/README.md index e2a72d1..298922e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,11 @@ Database of data recorded in the group. - ## Assumptions: * there is only a single subject in each dataset * each recording contains a single cell + + +## FIXMEs +* repro must have foreign keys to dataset and subject +* Dataset finding samplerate in stimuli.dat can be improved \ No newline at end of file diff --git a/fishbook/__init__.py b/fishbook/__init__.py index fcb6241..ee7afae 100644 --- a/fishbook/__init__.py +++ b/fishbook/__init__.py @@ -1,3 +1,4 @@ -from fishbook.fishbook import * +from .fishbook import * +from .reproclasses import BaselineData import fishbook.database as database __all__ = ['fishbook', 'database'] \ No newline at end of file diff --git a/fishbook/baseline_data.py b/fishbook/baseline_data.py deleted file mode 100644 index da865a5..0000000 --- a/fishbook/baseline_data.py +++ /dev/null @@ -1,93 +0,0 @@ -from fishbook.fishbook import Dataset, RePro -import numpy as np -import nixio as nix -import os - -from IPython import embed - - -class BaselineData(object): - - def __init__(self, dataset:Dataset): - self.__data = [] - self.__dataset = dataset - self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell - self._get_data() - - def _get_data(self): - if not self.__dataset: - self.__data = [] - self.__data = [] - - repros = RePro.find_repros("BaselineActivity", cell_id=self.__cell.cell_id) - for r in repros: - self.__data.append(self.__read_data(r)) - - def __read_data(self, r:RePro): - if self.__dataset.has_nix: - return self.__read_data_from_nix(r) - else: - return self.__read_data_from_directory(r) - - @property - def dataset(self): - return self.__dataset - - def data(self, index:int=0): - return self.__data[0] if len(self.__data) >= index else None - - @property - def size(self): - return len(self.__data) - - def __str__(self): - str = "Baseline data of cell %s " % self.__cell.cell_id - - def __read_data_from_nix(self, r:RePro)->np.ndarray: - data_source = os.path.join(self.__dataset.data_source, self.__dataset.dataset_id + ".nix") - if not os.path.exists(data_source): - print("Data not found! Trying from directory") - return self.__read_data_from_directory(r) - f = nix.File.open(data_source, nix.FileMode.ReadOnly) - b = f.blocks[0] - t = b.tags[r.repro_id] - if not t: - print("Tag not found!") - data = t.retrieve_data("Spikes-1")[:] - f.close() - return data - - def __read_data_from_directory(self, r)->np.ndarray: - data = [] - data_source = os.path.join(self.__dataset.data_source, "basespikes1.dat") - if os.path.exists(data_source): - found_run = False - with open(data_source, 'r') as f: - l = f.readline() - while l: - if "index" in l: - index = int(l.strip("#").strip().split(":")[-1]) - found_run = index == r.run - if l.startswith("#Key") and found_run: - data = self.__do_read(f) - break - l = f.readline() - embed() - return data - - def __do_read(self, f)->np.ndarray: - data = [] - f.readline() - f.readline() - l = f.readline() - while l and "#" not in l and len(l.strip()) > 0: - data.append(float(l.strip())) - l = f.readline() - return np.asarray(data) - - -if __name__ == "__main__": - dataset = Dataset(dataset_id='2011-06-14-ag') - # dataset = Dataset(dataset_id='2018-11-09-aa-invivo-1') - baseline = BaselineData(dataset) - embed() \ No newline at end of file diff --git a/fishbook/fishbook.py b/fishbook/fishbook.py index cecf6e3..b376fe2 100644 --- a/fishbook/fishbook.py +++ b/fishbook/fishbook.py @@ -1,6 +1,9 @@ from .database.database import Cells, Datasets, CellDatasetMap, Subjects, SubjectProperties, SubjectDatasetMap, Stimuli, Repros +import nixio as nix +import os import numpy as np -from IPython import embed +# from IPython import embed + def _safe_get_val(dictionary:dict, key, default=None): return dictionary[key] if key in dictionary.keys() else default @@ -26,11 +29,11 @@ class Cell: print("Empty Cell, not linked to any database entry!") @property - def cell_id(self): + def id(self): return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else "" @property - def cell_type(self): + def type(self): return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else "" @property @@ -54,7 +57,7 @@ class Cell: @property def repro_runs(self): - repros = (Repros & "cell_id = '%s'" % self.cell_id) + repros = (Repros & "cell_id = '%s'" % self.id) return [RePro(tuple=r) for r in repros] @staticmethod @@ -62,7 +65,7 @@ class Cell: return np.unique(Cells.fetch("cell_type")) @staticmethod - def find_cells(cell_type=None, species=None, quality="good"): + def find(cell_type=None, species=None, quality="good"): cs = Cells * CellDatasetMap * Datasets * Subjects if cell_type: cs = cs & "cell_type like '{0:s}'".format(cell_type) @@ -74,12 +77,13 @@ class Cell: def __str__(self): str = "" - str += "Cell: %s \t type: %s\n"%(self.cell_id, self.cell_type) + str += "Cell: %s \t type: %s\n"%(self.id, self.type) return str class Dataset: def __init__(self, dataset_id=None, tuple=None): + self.__samplerate = 0.0 if tuple: self.__tuple = tuple elif dataset_id: @@ -89,9 +93,11 @@ class Dataset: self.__tuple = dsets.fetch(limit=1)[0] else: print("Empty dataset, not linked to any database entry!") + if len(self.__tuple.keys()) > 0: + self.__find_samplerate() @property - def dataset_id(self): + def id(self): return self.__tuple["dataset_id"] @property @@ -136,8 +142,12 @@ class Dataset: subjs = (Subjects * (SubjectDatasetMap & self.__tuple)) return [Subject(tuple=s) for s in subjs] + @property + def samplerate(self): + return self.__samplerate + @staticmethod - def find_datasets(min_duration=None, experimenter=None, quality=None): + def find(min_duration=None, experimenter=None, quality=None): dsets = Datasets if min_duration: dsets = dsets & "duration > %.2f" % min_duration @@ -147,6 +157,32 @@ class Dataset: dsets = dsets & "quality like '{0:s}'".format(quality) return [Dataset(tuple=d) for d in dsets] + def __find_samplerate(self, trace_name="V-1"): + if self.has_nix and os.path.exists(os.path.join(self.data_source, self.id, '.nix')): + f = nix.File.open(os.path.join(self.data_source, self.id, '.nix'), nix.FileMode.ReadOnly) + b = f.blocks[0] + if trace_name in b.data_arrays: + trace = b.data_arrays[trace_name] + if trace.dimensions[0].dimension_type == nix.DimensionType.Sample: + self.__samplerate = 1./trace.dimensions[0].sampling_interval + else: + print("Requested trace %s has no sampled dimension!" % s) + else: + print("Requested trace %s was not found!" % s) + f.close() + else: + stim_file = os.path.join(self.data_source , 'stimuli.dat') + if not os.path.exists(stim_file): + return + lines = None + with open(stim_file, 'r') as f: + lines = f.readlines() + for l in lines: + if "sample interval1" in l: + si = l.strip().split(":")[-1][:-2] + break + self.__samplerate = 1000. / float(si) + class RePro: def __init__(self, repro_id=None, tuple=None): @@ -161,7 +197,7 @@ class RePro: print("Empty RePro, not linked to any database entry!") @property - def repro_id(self): + def id(self): return _safe_get_val(self.__tuple, "repro_id", "") @property @@ -176,6 +212,14 @@ class RePro: def cell(self): return Cell(self.cell_id) + @property + def dataset(self): + datasets = (Cells & "cell_id = '%s'" % self.cell_id) * CellDatasetMap * Datasets + d = datasets.proj('dataset_id', 'data_source', 'experimenter', 'setup', 'recording_date', + 'quality', 'comment', 'duration', 'has_nix').fetch(limit=1, as_dict=True)[0] + del d["cell_id"] + return Dataset(tuple=d) + @property def name(self): return _safe_get_val(self.__tuple, "repro_name", "") @@ -194,11 +238,11 @@ class RePro: @property def stimuli(self): - stims = Stimuli & "repro_id = '%s'" % self.repro_id & "cell_id = '%s'" % self.cell_id + stims = Stimuli & "repro_id = '%s'" % self.id & "cell_id = '%s'" % self.cell_id return [Stimulus(tuple=s) for s in stims] @staticmethod - def find_repros(name=None, cell_id=None, cell_type=None, species=None, settings=None, quality=None): + def find(name=None, cell_id=None, cell_type=None, species=None, settings=None, quality=None): """ Cell type, quality, and species are ignored, if cell_id is provided :param repro_name: @@ -265,7 +309,7 @@ class Subject: print("Empty Subject, not linked to any database entry!") @property - def subject_id(self): + def id(self): return self.__tuple["subject_id"] @property @@ -282,7 +326,7 @@ class Subject: return (SubjectProperties & self.__tuple).fetch(as_dict=True) @staticmethod - def find_subjects(species=None): + def find(species=None): subjs = Subjects & True if species: subjs = (Subjects & "species like '%{0:s}%'".format(species)) diff --git a/fishbook/reproclasses.py b/fishbook/reproclasses.py new file mode 100644 index 0000000..f6a685c --- /dev/null +++ b/fishbook/reproclasses.py @@ -0,0 +1,160 @@ +from fishbook.fishbook import Dataset, RePro +import numpy as np +import nixio as nix +import os +import subprocess + +from IPython import embed + + +def _unzip_if_needed(dataset, tracename='trace-1.raw'): + file_name = os.path.join(dataset, tracename) + if os.path.exists(file_name): + return + if os.path.exists(file_name + '.gz'): + print("\tunzip: %s" % tracename) + subprocess.check_call(["gunzip", os.path.join(dataset, tracename + ".gz")]) + + +class BaselineData: + + def __init__(self, dataset:Dataset): + self.__spike_data = [] + self.__eod_data = [] + self.__dataset = dataset + self.__repros = None + self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell + self._get_data() + + def _get_data(self): + if not self.__dataset: + return + self.__repros = RePro.find("BaselineActivity", cell_id=self.__cell.id) + for r in self.__repros: + self.__spike_data.append(self.__read_spike_data(r)) + self.__eod_data.append(self.__read_eod_data(r, self.__spike_data[-1][-1])) + + def __read_spike_data(self, r:RePro): + if self.__dataset.has_nix: + return self.__read_spike_data_from_nix(r) + else: + return self.__read_spike_data_from_directory(r) + + def __read_eod_data(self, r:RePro, duration): + if self.__dataset.has_nix: + return self.__read_eod_data_from_nix(r, duration) + else: + return self.__read_eod_data_from_directory(r, duration) + + @property + def dataset(self): + return self.__dataset + + @property + def cell(self): + cells = self.__dataset.cells + return cells if len(cells) > 1 else cells[0] + + @property + def subject(self): + subjects = self.__dataset.subjects + return subjects if len(subjects) > 1 else subjects[0] + + def spike_data(self, index:int=0): + return self.__spike_data[index] if len(self.__spike_data) >= index else None + + def eod_data(self, index:int=0): + eod = self.__eod_data[index] if len(self.__eod_data) >= index else None + time = np.arange(len(eod)) / self.__dataset.samplerate + return eod, time + + @property + def coefficient_of_variation(self): + cvs = [] + for d in self.__spike_data: + isis = np.diff(d) + cvs.append(np.std(isis)/np.mean(d=isis)) + return cvs + + @property + def vector_strength(self): + vss = [] + return vss + + @property + def size(self): + return len(self.__spike_data) + + def __str__(self): + str = "Baseline data of cell %s " % self.__cell.id + + def __read_eod_data_from_nix(self, r:RePro, duration)->np.ndarray: + data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix") + if not os.path.exists(data_source): + print("Data not found! Trying from directory") + return self.__read_eod_data_from_directory(r, duration) + f = nix.File.open(data_source, nix.FileMode.ReadOnly) + b = f.blocks[0] + t = b.tags[r.id] + if not t: + print("Tag not found!") + data = t.retrieve_data("EOD")[:] + f.close() + return data + + def __read_eod_data_from_directory(self, r:RePro, duration)->np.ndarray: + sr = self.__dataset.samplerate + _unzip_if_needed(self.__dataset.data_source, "trace-2.raw") + eod = np.fromfile(self.__dataset.data_source + "/trace-2.raw", np.float32) + eod = eod[:int(duration * sr)] + return eod + + def __read_spike_data_from_nix(self, r:RePro)->np.ndarray: + data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix") + if not os.path.exists(data_source): + print("Data not found! Trying from directory") + return self.__read_spike_data_from_directory(r) + f = nix.File.open(data_source, nix.FileMode.ReadOnly) + b = f.blocks[0] + t = b.tags[r.id] + if not t: + print("Tag not found!") + data = t.retrieve_data("Spikes-1")[:] + f.close() + return data + + + def __read_spike_data_from_directory(self, r)->np.ndarray: + data = [] + data_source = os.path.join(self.__dataset.data_source, "basespikes1.dat") + if os.path.exists(data_source): + found_run = False + with open(data_source, 'r') as f: + l = f.readline() + while l: + if "index" in l: + index = int(l.strip("#").strip().split(":")[-1]) + found_run = index == r.run + if l.startswith("#Key") and found_run: + data = self.__do_read(f) + break + l = f.readline() + return data + + def __do_read(self, f)->np.ndarray: + data = [] + f.readline() + unit = f.readline().strip("#").strip() + scale = 0.001 if unit == "ms" else 1 + l = f.readline() + while l and "#" not in l and len(l.strip()) > 0: + data.append(float(l.strip())*scale) + l = f.readline() + return np.asarray(data) + + +if __name__ == "__main__": + dataset = Dataset(dataset_id='2011-06-14-ag') + # dataset = Dataset(dataset_id='2018-11-09-aa-invivo-1') + baseline = BaselineData(dataset) + embed() \ No newline at end of file diff --git a/setup.py b/setup.py index afc0d0d..9037488 100644 --- a/setup.py +++ b/setup.py @@ -8,5 +8,5 @@ setup(name='fishbook', packages=find_packages(exclude=['contrib', 'doc', 'tests*']), description='Database providing an overview of the electrophysiological data recorded in the group.', author='Jan Grewe', - requires=['datajoint', 'numpy', 'nixio'] + requires=['datajoint', 'nixio', 'numpy', 'PyYAML'] ) diff --git a/test.py b/test.py index efb3897..30d3681 100644 --- a/test.py +++ b/test.py @@ -5,5 +5,11 @@ from IPython import embed data_dir = "/data/apteronotus" -datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-*')) -fb.database.populate(datasets, False) \ No newline at end of file +datasets = sorted(glob.glob(os.path.join(data_dir, '201*'))) +dsets = [] +for d in datasets: + if "2010" in d or "2011" in d: + continue + else: + dsets.append(d) +fb.database.populate(dsets, False)