This commit is contained in:
Jan Grewe 2019-09-21 11:18:33 +02:00
commit 0160c583d7
7 changed files with 234 additions and 111 deletions

View File

@ -7,4 +7,9 @@ Database of data recorded in the group.
## Assumptions & Caveats:
* there is only a single subject in each dataset
* each recording contains a single cell
* RePro links to Cell and not to Dataset
* RePro links to Cell and not to Dataset
## FIXMEs
* repro must have foreign keys to dataset and subject
* Dataset finding samplerate in stimuli.dat can be improved

View File

@ -1,3 +1,4 @@
from fishbook.fishbook import *
from .fishbook import *
from .reproclasses import BaselineData
import fishbook.database as database
__all__ = ['fishbook', 'database']

View File

@ -1,93 +0,0 @@
from fishbook.fishbook import Dataset, RePro
import numpy as np
import nixio as nix
import os
from IPython import embed
class BaselineData(object):
def __init__(self, dataset:Dataset):
self.__data = []
self.__dataset = dataset
self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell
self._get_data()
def _get_data(self):
if not self.__dataset:
self.__data = []
self.__data = []
repros = RePro.find_repros("BaselineActivity", cell_id=self.__cell.cell_id)
for r in repros:
self.__data.append(self.__read_data(r))
def __read_data(self, r:RePro):
if self.__dataset.has_nix:
return self.__read_data_from_nix(r)
else:
return self.__read_data_from_directory(r)
@property
def dataset(self):
return self.__dataset
def data(self, index:int=0):
return self.__data[0] if len(self.__data) >= index else None
@property
def size(self):
return len(self.__data)
def __str__(self):
str = "Baseline data of cell %s " % self.__cell.cell_id
def __read_data_from_nix(self, r:RePro)->np.ndarray:
data_source = os.path.join(self.__dataset.data_source, self.__dataset.dataset_id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
return self.__read_data_from_directory(r)
f = nix.File.open(data_source, nix.FileMode.ReadOnly)
b = f.blocks[0]
t = b.tags[r.repro_id]
if not t:
print("Tag not found!")
data = t.retrieve_data("Spikes-1")[:]
f.close()
return data
def __read_data_from_directory(self, r)->np.ndarray:
data = []
data_source = os.path.join(self.__dataset.data_source, "basespikes1.dat")
if os.path.exists(data_source):
found_run = False
with open(data_source, 'r') as f:
l = f.readline()
while l:
if "index" in l:
index = int(l.strip("#").strip().split(":")[-1])
found_run = index == r.run
if l.startswith("#Key") and found_run:
data = self.__do_read(f)
break
l = f.readline()
embed()
return data
def __do_read(self, f)->np.ndarray:
data = []
f.readline()
f.readline()
l = f.readline()
while l and "#" not in l and len(l.strip()) > 0:
data.append(float(l.strip()))
l = f.readline()
return np.asarray(data)
if __name__ == "__main__":
dataset = Dataset(dataset_id='2011-06-14-ag')
# dataset = Dataset(dataset_id='2018-11-09-aa-invivo-1')
baseline = BaselineData(dataset)
embed()

View File

@ -1,6 +1,9 @@
from .database.database import Cells, Datasets, CellDatasetMap, Subjects, SubjectProperties, SubjectDatasetMap, Stimuli, Repros
import nixio as nix
import os
import numpy as np
from IPython import embed
# from IPython import embed
def _safe_get_val(dictionary:dict, key, default=None):
return dictionary[key] if key in dictionary.keys() else default
@ -26,11 +29,11 @@ class Cell:
print("Empty Cell, not linked to any database entry!")
@property
def cell_id(self):
def id(self):
return self.__tuple["cell_id"] if "cell_id" in self.__tuple.keys() else ""
@property
def cell_type(self):
def type(self):
return self.__tuple["cell_type"] if "cell_type" in self.__tuple.keys() else ""
@property
@ -54,7 +57,7 @@ class Cell:
@property
def repro_runs(self):
repros = (Repros & "cell_id = '%s'" % self.cell_id)
repros = (Repros & "cell_id = '%s'" % self.id)
return [RePro(tuple=r) for r in repros]
@staticmethod
@ -62,7 +65,7 @@ class Cell:
return np.unique(Cells.fetch("cell_type"))
@staticmethod
def find_cells(cell_type=None, species=None, quality="good"):
def find(cell_type=None, species=None, quality="good"):
cs = Cells * CellDatasetMap * Datasets * Subjects
if cell_type:
cs = cs & "cell_type like '{0:s}'".format(cell_type)
@ -74,12 +77,13 @@ class Cell:
def __str__(self):
str = ""
str += "Cell: %s \t type: %s\n"%(self.cell_id, self.cell_type)
str += "Cell: %s \t type: %s\n"%(self.id, self.type)
return str
class Dataset:
def __init__(self, dataset_id=None, tuple=None):
self.__samplerate = 0.0
if tuple:
self.__tuple = tuple
elif dataset_id:
@ -89,9 +93,11 @@ class Dataset:
self.__tuple = dsets.fetch(limit=1)[0]
else:
print("Empty dataset, not linked to any database entry!")
if len(self.__tuple.keys()) > 0:
self.__find_samplerate()
@property
def dataset_id(self):
def id(self):
return self.__tuple["dataset_id"]
@property
@ -136,8 +142,12 @@ class Dataset:
subjs = (Subjects * (SubjectDatasetMap & self.__tuple))
return [Subject(tuple=s) for s in subjs]
@property
def samplerate(self):
return self.__samplerate
@staticmethod
def find_datasets(min_duration=None, experimenter=None, quality=None):
def find(min_duration=None, experimenter=None, quality=None):
dsets = Datasets
if min_duration:
dsets = dsets & "duration > %.2f" % min_duration
@ -147,6 +157,32 @@ class Dataset:
dsets = dsets & "quality like '{0:s}'".format(quality)
return [Dataset(tuple=d) for d in dsets]
def __find_samplerate(self, trace_name="V-1"):
if self.has_nix and os.path.exists(os.path.join(self.data_source, self.id, '.nix')):
f = nix.File.open(os.path.join(self.data_source, self.id, '.nix'), nix.FileMode.ReadOnly)
b = f.blocks[0]
if trace_name in b.data_arrays:
trace = b.data_arrays[trace_name]
if trace.dimensions[0].dimension_type == nix.DimensionType.Sample:
self.__samplerate = 1./trace.dimensions[0].sampling_interval
else:
print("Requested trace %s has no sampled dimension!" % s)
else:
print("Requested trace %s was not found!" % s)
f.close()
else:
stim_file = os.path.join(self.data_source , 'stimuli.dat')
if not os.path.exists(stim_file):
return
lines = None
with open(stim_file, 'r') as f:
lines = f.readlines()
for l in lines:
if "sample interval1" in l:
si = l.strip().split(":")[-1][:-2]
break
self.__samplerate = 1000. / float(si)
class RePro:
def __init__(self, repro_id=None, tuple=None):
@ -161,7 +197,7 @@ class RePro:
print("Empty RePro, not linked to any database entry!")
@property
def repro_id(self):
def id(self):
return _safe_get_val(self.__tuple, "repro_id", "")
@property
@ -176,6 +212,14 @@ class RePro:
def cell(self):
return Cell(self.cell_id)
@property
def dataset(self):
datasets = (Cells & "cell_id = '%s'" % self.cell_id) * CellDatasetMap * Datasets
d = datasets.proj('dataset_id', 'data_source', 'experimenter', 'setup', 'recording_date',
'quality', 'comment', 'duration', 'has_nix').fetch(limit=1, as_dict=True)[0]
del d["cell_id"]
return Dataset(tuple=d)
@property
def name(self):
return _safe_get_val(self.__tuple, "repro_name", "")
@ -194,11 +238,11 @@ class RePro:
@property
def stimuli(self):
stims = Stimuli & "repro_id = '%s'" % self.repro_id & "cell_id = '%s'" % self.cell_id
stims = Stimuli & "repro_id = '%s'" % self.id & "cell_id = '%s'" % self.cell_id
return [Stimulus(tuple=s) for s in stims]
@staticmethod
def find_repros(name=None, cell_id=None, cell_type=None, species=None, settings=None, quality=None):
def find(name=None, cell_id=None, cell_type=None, species=None, settings=None, quality=None):
"""
Cell type, quality, and species are ignored, if cell_id is provided
:param repro_name:
@ -265,7 +309,7 @@ class Subject:
print("Empty Subject, not linked to any database entry!")
@property
def subject_id(self):
def id(self):
return self.__tuple["subject_id"]
@property
@ -282,7 +326,7 @@ class Subject:
return (SubjectProperties & self.__tuple).fetch(as_dict=True)
@staticmethod
def find_subjects(species=None):
def find(species=None):
subjs = Subjects & True
if species:
subjs = (Subjects & "species like '%{0:s}%'".format(species))

160
fishbook/reproclasses.py Normal file
View File

@ -0,0 +1,160 @@
from fishbook.fishbook import Dataset, RePro
import numpy as np
import nixio as nix
import os
import subprocess
from IPython import embed
def _unzip_if_needed(dataset, tracename='trace-1.raw'):
file_name = os.path.join(dataset, tracename)
if os.path.exists(file_name):
return
if os.path.exists(file_name + '.gz'):
print("\tunzip: %s" % tracename)
subprocess.check_call(["gunzip", os.path.join(dataset, tracename + ".gz")])
class BaselineData:
def __init__(self, dataset:Dataset):
self.__spike_data = []
self.__eod_data = []
self.__dataset = dataset
self.__repros = None
self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell
self._get_data()
def _get_data(self):
if not self.__dataset:
return
self.__repros = RePro.find("BaselineActivity", cell_id=self.__cell.id)
for r in self.__repros:
self.__spike_data.append(self.__read_spike_data(r))
self.__eod_data.append(self.__read_eod_data(r, self.__spike_data[-1][-1]))
def __read_spike_data(self, r:RePro):
if self.__dataset.has_nix:
return self.__read_spike_data_from_nix(r)
else:
return self.__read_spike_data_from_directory(r)
def __read_eod_data(self, r:RePro, duration):
if self.__dataset.has_nix:
return self.__read_eod_data_from_nix(r, duration)
else:
return self.__read_eod_data_from_directory(r, duration)
@property
def dataset(self):
return self.__dataset
@property
def cell(self):
cells = self.__dataset.cells
return cells if len(cells) > 1 else cells[0]
@property
def subject(self):
subjects = self.__dataset.subjects
return subjects if len(subjects) > 1 else subjects[0]
def spike_data(self, index:int=0):
return self.__spike_data[index] if len(self.__spike_data) >= index else None
def eod_data(self, index:int=0):
eod = self.__eod_data[index] if len(self.__eod_data) >= index else None
time = np.arange(len(eod)) / self.__dataset.samplerate
return eod, time
@property
def coefficient_of_variation(self):
cvs = []
for d in self.__spike_data:
isis = np.diff(d)
cvs.append(np.std(isis)/np.mean(d=isis))
return cvs
@property
def vector_strength(self):
vss = []
return vss
@property
def size(self):
return len(self.__spike_data)
def __str__(self):
str = "Baseline data of cell %s " % self.__cell.id
def __read_eod_data_from_nix(self, r:RePro, duration)->np.ndarray:
data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
return self.__read_eod_data_from_directory(r, duration)
f = nix.File.open(data_source, nix.FileMode.ReadOnly)
b = f.blocks[0]
t = b.tags[r.id]
if not t:
print("Tag not found!")
data = t.retrieve_data("EOD")[:]
f.close()
return data
def __read_eod_data_from_directory(self, r:RePro, duration)->np.ndarray:
sr = self.__dataset.samplerate
_unzip_if_needed(self.__dataset.data_source, "trace-2.raw")
eod = np.fromfile(self.__dataset.data_source + "/trace-2.raw", np.float32)
eod = eod[:int(duration * sr)]
return eod
def __read_spike_data_from_nix(self, r:RePro)->np.ndarray:
data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
return self.__read_spike_data_from_directory(r)
f = nix.File.open(data_source, nix.FileMode.ReadOnly)
b = f.blocks[0]
t = b.tags[r.id]
if not t:
print("Tag not found!")
data = t.retrieve_data("Spikes-1")[:]
f.close()
return data
def __read_spike_data_from_directory(self, r)->np.ndarray:
data = []
data_source = os.path.join(self.__dataset.data_source, "basespikes1.dat")
if os.path.exists(data_source):
found_run = False
with open(data_source, 'r') as f:
l = f.readline()
while l:
if "index" in l:
index = int(l.strip("#").strip().split(":")[-1])
found_run = index == r.run
if l.startswith("#Key") and found_run:
data = self.__do_read(f)
break
l = f.readline()
return data
def __do_read(self, f)->np.ndarray:
data = []
f.readline()
unit = f.readline().strip("#").strip()
scale = 0.001 if unit == "ms" else 1
l = f.readline()
while l and "#" not in l and len(l.strip()) > 0:
data.append(float(l.strip())*scale)
l = f.readline()
return np.asarray(data)
if __name__ == "__main__":
dataset = Dataset(dataset_id='2011-06-14-ag')
# dataset = Dataset(dataset_id='2018-11-09-aa-invivo-1')
baseline = BaselineData(dataset)
embed()

View File

@ -8,5 +8,5 @@ setup(name='fishbook',
packages=find_packages(exclude=['contrib', 'doc', 'tests*']),
description='Database providing an overview of the electrophysiological data recorded in the group.',
author='Jan Grewe',
requires=['datajoint', 'numpy', 'nixio']
requires=['datajoint', 'nixio', 'numpy', 'PyYAML']
)

10
test.py
View File

@ -5,5 +5,11 @@ from IPython import embed
data_dir = "/data/apteronotus"
datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-*'))
fb.database.populate(datasets, False)
datasets = sorted(glob.glob(os.path.join(data_dir, '201*')))
dsets = []
for d in datasets:
if "2010" in d or "2011" in d:
continue
else:
dsets.append(d)
fb.database.populate(dsets, False)