This commit is contained in:
Jan Grewe 2019-09-21 11:18:33 +02:00
commit 0160c583d7
7 changed files with 234 additions and 111 deletions

View File

@ -7,4 +7,9 @@ Database of data recorded in the group.
## Assumptions & Caveats: ## Assumptions & Caveats:
* there is only a single subject in each dataset * there is only a single subject in each dataset
* each recording contains a single cell * each recording contains a single cell
* RePro links to Cell and not to Dataset * RePro links to Cell and not to Dataset
## FIXMEs
* repro must have foreign keys to dataset and subject
* Dataset finding samplerate in stimuli.dat can be improved

View File

@ -1,3 +1,4 @@
from fishbook.fishbook import * from .fishbook import *
from .reproclasses import BaselineData
import fishbook.database as database import fishbook.database as database
__all__ = ['fishbook', 'database'] __all__ = ['fishbook', 'database']

View File

@ -1,93 +0,0 @@
from fishbook.fishbook import Dataset, RePro
import numpy as np
import nixio as nix
import os
from IPython import embed
class BaselineData(object):
def __init__(self, dataset:Dataset):
self.__data = []
self.__dataset = dataset
self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell
self._get_data()
def _get_data(self):
if not self.__dataset:
self.__data = []
self.__data = []
repros = RePro.find_repros("BaselineActivity", cell_id=self.__cell.cell_id)
for r in repros:
self.__data.append(self.__read_data(r))
def __read_data(self, r:RePro):
if self.__dataset.has_nix:
return self.__read_data_from_nix(r)
else:
return self.__read_data_from_directory(r)
@property
def dataset(self):
return self.__dataset
def data(self, index:int=0):
return self.__data[0] if len(self.__data) >= index else None
@property
def size(self):
return len(self.__data)
def __str__(self):
str = "Baseline data of cell %s " % self.__cell.cell_id
def __read_data_from_nix(self, r:RePro)->np.ndarray:
data_source = os.path.join(self.__dataset.data_source, self.__dataset.dataset_id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
return self.__read_data_from_directory(r)
f = nix.File.open(data_source, nix.FileMode.ReadOnly)
b = f.blocks[0]
t = b.tags[r.repro_id]
if not t:
print("Tag not found!")
data = t.retrieve_data("Spikes-1")[:]
f.close()
return data
def __read_data_from_directory(self, r)->np.ndarray:
data = []
data_source = os.path.join(self.__dataset.data_source, "basespikes1.dat")
if os.path.exists(data_source):
found_run = False
with open(data_source, 'r') as f:
l = f.readline()
while l:
if "index" in l:
index = int(l.strip("#").strip().split(":")[-1])
found_run = index == r.run
if l.startswith("#Key") and found_run:
data = self.__do_read(f)
break
l = f.readline()
embed()
return data
def __do_read(self, f)->np.ndarray:
data = []
f.readline()
f.readline()
l = f.readline()
while l and "#" not in l and len(l.strip()) > 0:
data.append(float(l.strip()))
l = f.readline()
return np.asarray(data)
if __name__ == "__main__":
dataset = Dataset(dataset_id='2011-06-14-ag')
# dataset = Dataset(dataset_id='2018-11-09-aa-invivo-1')
baseline = BaselineData(dataset)
embed()

View File

@ -1,6 +1,9 @@
from .database.database import Cells, Datasets, CellDatasetMap, Subjects, SubjectProperties, SubjectDatasetMap, Stimuli, Repros from .database.database import Cells, Datasets, CellDatasetMap, Subjects, SubjectProperties, SubjectDatasetMap, Stimuli, Repros
import nixio as nix
import os
import numpy as np import numpy as np
from IPython import embed # from IPython import embed
def _safe_get_val(dictionary:dict, key, default=None): def _safe_get_val(dictionary:dict, key, default=None):
return dictionary[key] if key in dictionary.keys() else default return dictionary[key] if key in dictionary.keys() else default
@ -26,11 +29,11 @@ class Cell:
print("Empty Cell, not linked to any database entry!") print("Empty Cell, not linked to any database entry!")
@property @property
def cell_id(self): def id(self):
return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else "" return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else ""
@property @property
def cell_type(self): def type(self):
return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else "" return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else ""
@property @property
@ -54,7 +57,7 @@ class Cell:
@property @property
def repro_runs(self): def repro_runs(self):
repros = (Repros & "cell_id = '%s'" % self.cell_id) repros = (Repros & "cell_id = '%s'" % self.id)
return [RePro(tuple=r) for r in repros] return [RePro(tuple=r) for r in repros]
@staticmethod @staticmethod
@ -62,7 +65,7 @@ class Cell:
return np.unique(Cells.fetch("cell_type")) return np.unique(Cells.fetch("cell_type"))
@staticmethod @staticmethod
def find_cells(cell_type=None, species=None, quality="good"): def find(cell_type=None, species=None, quality="good"):
cs = Cells * CellDatasetMap * Datasets * Subjects cs = Cells * CellDatasetMap * Datasets * Subjects
if cell_type: if cell_type:
cs = cs & "cell_type like '{0:s}'".format(cell_type) cs = cs & "cell_type like '{0:s}'".format(cell_type)
@ -74,12 +77,13 @@ class Cell:
def __str__(self): def __str__(self):
str = "" str = ""
str += "Cell: %s \t type: %s\n"%(self.cell_id, self.cell_type) str += "Cell: %s \t type: %s\n"%(self.id, self.type)
return str return str
class Dataset: class Dataset:
def __init__(self, dataset_id=None, tuple=None): def __init__(self, dataset_id=None, tuple=None):
self.__samplerate = 0.0
if tuple: if tuple:
self.__tuple = tuple self.__tuple = tuple
elif dataset_id: elif dataset_id:
@ -89,9 +93,11 @@ class Dataset:
self.__tuple = dsets.fetch(limit=1)[0] self.__tuple = dsets.fetch(limit=1)[0]
else: else:
print("Empty dataset, not linked to any database entry!") print("Empty dataset, not linked to any database entry!")
if len(self.__tuple.keys()) > 0:
self.__find_samplerate()
@property @property
def dataset_id(self): def id(self):
return self.__tuple["dataset_id"] return self.__tuple["dataset_id"]
@property @property
@ -136,8 +142,12 @@ class Dataset:
subjs = (Subjects * (SubjectDatasetMap & self.__tuple)) subjs = (Subjects * (SubjectDatasetMap & self.__tuple))
return [Subject(tuple=s) for s in subjs] return [Subject(tuple=s) for s in subjs]
@property
def samplerate(self):
return self.__samplerate
@staticmethod @staticmethod
def find_datasets(min_duration=None, experimenter=None, quality=None): def find(min_duration=None, experimenter=None, quality=None):
dsets = Datasets dsets = Datasets
if min_duration: if min_duration:
dsets = dsets & "duration > %.2f" % min_duration dsets = dsets & "duration > %.2f" % min_duration
@ -147,6 +157,32 @@ class Dataset:
dsets = dsets & "quality like '{0:s}'".format(quality) dsets = dsets & "quality like '{0:s}'".format(quality)
return [Dataset(tuple=d) for d in dsets] return [Dataset(tuple=d) for d in dsets]
def __find_samplerate(self, trace_name="V-1"):
if self.has_nix and os.path.exists(os.path.join(self.data_source, self.id, '.nix')):
f = nix.File.open(os.path.join(self.data_source, self.id, '.nix'), nix.FileMode.ReadOnly)
b = f.blocks[0]
if trace_name in b.data_arrays:
trace = b.data_arrays[trace_name]
if trace.dimensions[0].dimension_type == nix.DimensionType.Sample:
self.__samplerate = 1./trace.dimensions[0].sampling_interval
else:
print("Requested trace %s has no sampled dimension!" % s)
else:
print("Requested trace %s was not found!" % s)
f.close()
else:
stim_file = os.path.join(self.data_source , 'stimuli.dat')
if not os.path.exists(stim_file):
return
lines = None
with open(stim_file, 'r') as f:
lines = f.readlines()
for l in lines:
if "sample interval1" in l:
si = l.strip().split(":")[-1][:-2]
break
self.__samplerate = 1000. / float(si)
class RePro: class RePro:
def __init__(self, repro_id=None, tuple=None): def __init__(self, repro_id=None, tuple=None):
@ -161,7 +197,7 @@ class RePro:
print("Empty RePro, not linked to any database entry!") print("Empty RePro, not linked to any database entry!")
@property @property
def repro_id(self): def id(self):
return _safe_get_val(self.__tuple, "repro_id", "") return _safe_get_val(self.__tuple, "repro_id", "")
@property @property
@ -176,6 +212,14 @@ class RePro:
def cell(self): def cell(self):
return Cell(self.cell_id) return Cell(self.cell_id)
@property
def dataset(self):
datasets = (Cells & "cell_id = '%s'" % self.cell_id) * CellDatasetMap * Datasets
d = datasets.proj('dataset_id', 'data_source', 'experimenter', 'setup', 'recording_date',
'quality', 'comment', 'duration', 'has_nix').fetch(limit=1, as_dict=True)[0]
del d["cell_id"]
return Dataset(tuple=d)
@property @property
def name(self): def name(self):
return _safe_get_val(self.__tuple, "repro_name", "") return _safe_get_val(self.__tuple, "repro_name", "")
@ -194,11 +238,11 @@ class RePro:
@property @property
def stimuli(self): def stimuli(self):
stims = Stimuli & "repro_id = '%s'" % self.repro_id & "cell_id = '%s'" % self.cell_id stims = Stimuli & "repro_id = '%s'" % self.id & "cell_id = '%s'" % self.cell_id
return [Stimulus(tuple=s) for s in stims] return [Stimulus(tuple=s) for s in stims]
@staticmethod @staticmethod
def find_repros(name=None, cell_id=None, cell_type=None, species=None, settings=None, quality=None): def find(name=None, cell_id=None, cell_type=None, species=None, settings=None, quality=None):
""" """
Cell type, quality, and species are ignored, if cell_id is provided Cell type, quality, and species are ignored, if cell_id is provided
:param repro_name: :param repro_name:
@ -265,7 +309,7 @@ class Subject:
print("Empty Subject, not linked to any database entry!") print("Empty Subject, not linked to any database entry!")
@property @property
def subject_id(self): def id(self):
return self.__tuple["subject_id"] return self.__tuple["subject_id"]
@property @property
@ -282,7 +326,7 @@ class Subject:
return (SubjectProperties & self.__tuple).fetch(as_dict=True) return (SubjectProperties & self.__tuple).fetch(as_dict=True)
@staticmethod @staticmethod
def find_subjects(species=None): def find(species=None):
subjs = Subjects & True subjs = Subjects & True
if species: if species:
subjs = (Subjects & "species like '%{0:s}%'".format(species)) subjs = (Subjects & "species like '%{0:s}%'".format(species))

160
fishbook/reproclasses.py Normal file
View File

@ -0,0 +1,160 @@
from fishbook.fishbook import Dataset, RePro
import numpy as np
import nixio as nix
import os
import subprocess
from IPython import embed
def _unzip_if_needed(dataset, tracename='trace-1.raw'):
file_name = os.path.join(dataset, tracename)
if os.path.exists(file_name):
return
if os.path.exists(file_name + '.gz'):
print("\tunzip: %s" % tracename)
subprocess.check_call(["gunzip", os.path.join(dataset, tracename + ".gz")])
class BaselineData:
def __init__(self, dataset:Dataset):
self.__spike_data = []
self.__eod_data = []
self.__dataset = dataset
self.__repros = None
self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell
self._get_data()
def _get_data(self):
if not self.__dataset:
return
self.__repros = RePro.find("BaselineActivity", cell_id=self.__cell.id)
for r in self.__repros:
self.__spike_data.append(self.__read_spike_data(r))
self.__eod_data.append(self.__read_eod_data(r, self.__spike_data[-1][-1]))
def __read_spike_data(self, r:RePro):
if self.__dataset.has_nix:
return self.__read_spike_data_from_nix(r)
else:
return self.__read_spike_data_from_directory(r)
def __read_eod_data(self, r:RePro, duration):
if self.__dataset.has_nix:
return self.__read_eod_data_from_nix(r, duration)
else:
return self.__read_eod_data_from_directory(r, duration)
@property
def dataset(self):
return self.__dataset
@property
def cell(self):
cells = self.__dataset.cells
return cells if len(cells) > 1 else cells[0]
@property
def subject(self):
subjects = self.__dataset.subjects
return subjects if len(subjects) > 1 else subjects[0]
def spike_data(self, index:int=0):
return self.__spike_data[index] if len(self.__spike_data) >= index else None
def eod_data(self, index:int=0):
eod = self.__eod_data[index] if len(self.__eod_data) >= index else None
time = np.arange(len(eod)) / self.__dataset.samplerate
return eod, time
@property
def coefficient_of_variation(self):
cvs = []
for d in self.__spike_data:
isis = np.diff(d)
cvs.append(np.std(isis)/np.mean(d=isis))
return cvs
@property
def vector_strength(self):
vss = []
return vss
@property
def size(self):
return len(self.__spike_data)
def __str__(self):
str = "Baseline data of cell %s " % self.__cell.id
def __read_eod_data_from_nix(self, r:RePro, duration)->np.ndarray:
data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
return self.__read_eod_data_from_directory(r, duration)
f = nix.File.open(data_source, nix.FileMode.ReadOnly)
b = f.blocks[0]
t = b.tags[r.id]
if not t:
print("Tag not found!")
data = t.retrieve_data("EOD")[:]
f.close()
return data
def __read_eod_data_from_directory(self, r:RePro, duration)->np.ndarray:
sr = self.__dataset.samplerate
_unzip_if_needed(self.__dataset.data_source, "trace-2.raw")
eod = np.fromfile(self.__dataset.data_source + "/trace-2.raw", np.float32)
eod = eod[:int(duration * sr)]
return eod
def __read_spike_data_from_nix(self, r:RePro)->np.ndarray:
data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
return self.__read_spike_data_from_directory(r)
f = nix.File.open(data_source, nix.FileMode.ReadOnly)
b = f.blocks[0]
t = b.tags[r.id]
if not t:
print("Tag not found!")
data = t.retrieve_data("Spikes-1")[:]
f.close()
return data
def __read_spike_data_from_directory(self, r)->np.ndarray:
data = []
data_source = os.path.join(self.__dataset.data_source, "basespikes1.dat")
if os.path.exists(data_source):
found_run = False
with open(data_source, 'r') as f:
l = f.readline()
while l:
if "index" in l:
index = int(l.strip("#").strip().split(":")[-1])
found_run = index == r.run
if l.startswith("#Key") and found_run:
data = self.__do_read(f)
break
l = f.readline()
return data
def __do_read(self, f)->np.ndarray:
data = []
f.readline()
unit = f.readline().strip("#").strip()
scale = 0.001 if unit == "ms" else 1
l = f.readline()
while l and "#" not in l and len(l.strip()) > 0:
data.append(float(l.strip())*scale)
l = f.readline()
return np.asarray(data)
if __name__ == "__main__":
dataset = Dataset(dataset_id='2011-06-14-ag')
# dataset = Dataset(dataset_id='2018-11-09-aa-invivo-1')
baseline = BaselineData(dataset)
embed()

View File

@ -8,5 +8,5 @@ setup(name='fishbook',
packages=find_packages(exclude=['contrib', 'doc', 'tests*']), packages=find_packages(exclude=['contrib', 'doc', 'tests*']),
description='Database providing an overview of the electrophysiological data recorded in the group.', description='Database providing an overview of the electrophysiological data recorded in the group.',
author='Jan Grewe', author='Jan Grewe',
requires=['datajoint', 'numpy', 'nixio'] requires=['datajoint', 'nixio', 'numpy', 'PyYAML']
) )

10
test.py
View File

@ -5,5 +5,11 @@ from IPython import embed
data_dir = "/data/apteronotus" data_dir = "/data/apteronotus"
datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-*')) datasets = sorted(glob.glob(os.path.join(data_dir, '201*')))
fb.database.populate(datasets, False) dsets = []
for d in datasets:
if "2010" in d or "2011" in d:
continue
else:
dsets.append(d)
fb.database.populate(dsets, False)