Merge pull request 'several fixes and improvements for handling old relacs files' (#4) from relacs into master

Reviewed-on: jgrewe/fishBook#4
This commit is contained in:
csachgau 2020-08-14 17:04:41 +02:00
commit e95dd763a4
5 changed files with 123 additions and 33 deletions

View File

@ -457,13 +457,6 @@ def scan_folder_for_repros(dataset):
cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0] cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
for rs, si in zip(repro_settings, stim_indices): for rs, si in zip(repro_settings, stim_indices):
rp = Repros.get_template_tuple() rp = Repros.get_template_tuple()
path = []
if not find_key_recursive(rs, "run", path):
find_key_recursive(rs, "Run", path)
if len(path) > 0:
rp["run"] = deep_get(rs, path, 0)
else:
rp["run"] = -1
path = [] path = []
if not find_key_recursive(rs, "repro", path): if not find_key_recursive(rs, "repro", path):
@ -474,7 +467,16 @@ def scan_folder_for_repros(dataset):
if rp["repro_name"] in repro_counts.keys(): if rp["repro_name"] in repro_counts.keys():
repro_counts[rp["repro_name"]] += 1 repro_counts[rp["repro_name"]] += 1
else: else:
repro_counts[rp["repro_name"]] = 1 repro_counts[rp["repro_name"]] = 0
path = []
if not find_key_recursive(rs, "run", path):
find_key_recursive(rs, "Run", path)
if len(path) > 0:
rp["run"] = deep_get(rs, path, 0)
else: # the run information is not there and needs to be fixed!
rp["run"] = repro_counts[rp["repro_name"]]
rp["cell_id"] = cell_id rp["cell_id"] = cell_id
rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]]) rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]])
rp["start"] = 0. rp["start"] = 0.

View File

@ -84,7 +84,7 @@ def has_signal(line, col_names):
values = line.split() values = line.split()
for i, n in enumerate(col_names): for i, n in enumerate(col_names):
if n.lower() == "signal" and i < len(values): if n.lower() == "signal" and i < len(values):
if len(values[i].strip()) > 0 and values[i].strip()[0] != "-": if len(values[i].strip()) > 0 and (values[i].strip()[0] != "-" and values[i].strip() != "init"):
return True return True
return False return False

View File

@ -6,6 +6,7 @@ import numpy as np
from IPython import embed from IPython import embed
from fishbook.backend.util import progress from fishbook.backend.util import progress
import datetime as dt import datetime as dt
import yaml
class Cell: class Cell:
"""The Cell class represents a recorded cell. It is characterized by *id*, the cell *type*, the *firing_rate*, and the recording *location*. """The Cell class represents a recorded cell. It is characterized by *id*, the cell *type*, the *firing_rate*, and the recording *location*.
@ -162,7 +163,7 @@ class Dataset:
if tuple: if tuple:
self.__tuple = tuple self.__tuple = tuple
elif dataset_id: elif dataset_id:
pattern = "dataset_id like '{0:s}'".format(dataset_id) pattern = "dataset_id like '%{0:s}%'".format(dataset_id)
dsets = (Datasets & pattern) dsets = (Datasets & pattern)
results_check(dsets, dataset_id, "Dataset ID") results_check(dsets, dataset_id, "Dataset ID")
self.__tuple = dsets.fetch(limit=1, as_dict=True)[0] self.__tuple = dsets.fetch(limit=1, as_dict=True)[0]
@ -312,11 +313,12 @@ class Dataset:
return len(Datasets()) return len(Datasets())
@staticmethod @staticmethod
def find(min_duration=None, experimenter=None, quality=None, min_date=None, max_date=None, test=False): def find(dataset_id=None, min_duration=None, experimenter=None, quality=None, min_date=None, max_date=None, test=False):
"""Find dataset entries in the database. You may restrict the search by providing the following arguments. All restrictions are connected """Find dataset entries in the database. You may restrict the search by providing the following arguments. All restrictions are connected
with a logical AND. with a logical AND.
Args: Args:
dataset_id (str, optional): the id of the given Dataset, if unique, all other restrictions will be ignored. Defaults to None
min_duration (float, optional): minimum duration of the recording session, if not given, any length datasets will be returned. Defaults to None. min_duration (float, optional): minimum duration of the recording session, if not given, any length datasets will be returned. Defaults to None.
experimenter (str, optional): the name of the one who did the recording. The name does not need to be the full name. Defaults to None. experimenter (str, optional): the name of the one who did the recording. The name does not need to be the full name. Defaults to None.
quality (str, optional): the quality assigned to the dataset during recording (e.g. good, fair, poor). Defaults to None. quality (str, optional): the quality assigned to the dataset during recording (e.g. good, fair, poor). Defaults to None.
@ -328,6 +330,10 @@ class Dataset:
int: Count of matching results int: Count of matching results
""" """
dataset_list = Datasets() dataset_list = Datasets()
if dataset_id:
dataset_list = dataset_list & "dataset_id like '%{0:s}%'".format(dataset_id)
if len(dataset_list) == 1:
return [Dataset(tuple=dataset_list.fetch(as_dict=True)[0])], 1
if min_duration: if min_duration:
dataset_list = dataset_list & "duration > %.2f" % min_duration dataset_list = dataset_list & "duration > %.2f" % min_duration
if experimenter: if experimenter:
@ -377,6 +383,16 @@ class Dataset:
def _tuple(self): def _tuple(self):
return self.__tuple.copy() return self.__tuple.copy()
@property
def yaml(self):
settings = yaml.dump({"dataset id": self.id, "recording date": self.recording_date,
"duration": self.recording_duration, "comment": self.comment,
"experimenter": self.experimenter, "quality": self.quality,
"data_source": self.data_source, "host": self.data_host,
"setup": self.setup, "nixed": self.has_nix})
return settings
def __str__(self): def __str__(self):
str = "dataset id: %s\n" % self.id str = "dataset id: %s\n" % self.id
str += "recorded: %s \t by: %s\n" % (self.recording_date, self.experimenter) str += "recorded: %s \t by: %s\n" % (self.recording_date, self.experimenter)
@ -385,6 +401,8 @@ class Dataset:
str += "comment: %s" % self.comment str += "comment: %s" % self.comment
return str return str
def __repr__(self):
return self.__str__()
class RePro: class RePro:
"""The RePro class represents an entry in the repro table. This is a run of a certain relacs "Research Protocol". """The RePro class represents an entry in the repro table. This is a run of a certain relacs "Research Protocol".
@ -513,6 +531,17 @@ class RePro:
str += "start time: %s\t duration: %s\n" % (self.start, self.duration) str += "start time: %s\t duration: %s\n" % (self.start, self.duration)
return str return str
def __repr__(self):
return self.__str__()
@property
def to_dict(self):
r_settings = yaml.safe_load(self.settings.replace("\t", " "))
settings = {"repro id": self.id, "run": self.run, "cell": self.cell_id,
"name": self.name, "start": self.start, "duration": self.duration,
"settings":r_settings}
return settings
class Stimulus: class Stimulus:
"""The stimulus class represents a Stimulus that was presented. A Stimulus has several properties """The stimulus class represents a Stimulus that was presented. A Stimulus has several properties
@ -617,6 +646,17 @@ class Stimulus:
return results, total return results, total
def __repr__(self):
return self.__str__()
@property
def to_dict(self):
s_settings = yaml.safe_load(self.settings.replace("\t", " "))
settings = {"id": self.id, "run": self.run, "stimulus name" : self.name,
"stimulus index": self.index, "duration": self.duration, "start time": self.start_time,
"name": self.name, "settings": s_settings}
return settings
class Subject: class Subject:
"""Representation of the recorded subject's properties. """Representation of the recorded subject's properties.

View File

@ -18,7 +18,11 @@ class BaselineData:
This class provides access to basic measures estimated from the baseline activity. This class provides access to basic measures estimated from the baseline activity.
""" """
def __init__(self, dataset: Dataset): def __init__(self, dataset=None, dataset_id=None):
d, _ = Dataset.find(dataset_id=dataset_id)
if len(d) == 0 or len(d) > 1:
raise ValueError("Dataset id not found or not unique")
dataset = d[0]
self.__spike_data = [] self.__spike_data = []
self.__eod_data = [] self.__eod_data = []
self.__eod_times = [] self.__eod_times = []
@ -66,6 +70,21 @@ class BaselineData:
a_corr = a_corr[int(len(a_corr) / 2):] a_corr = a_corr[int(len(a_corr) / 2):]
return a_corr[:max_lags] return a_corr[:max_lags]
@property
def baseline_rate(self):
"""The average baseline firing rate for each run of the baseline repro
Returns:
list of float: the average firing rate
"""
rates = []
for i in range(self.size):
spikes = self.spikes(i)
max_time = np.floor(spikes)[-1]
min_time = np.ceil(spikes)[0]
rates.append(len(spikes[(spikes >= min_time) & (spikes < max_time)])/(max_time - min_time))
return rates
def serial_correlation(self, max_lags=50): def serial_correlation(self, max_lags=50):
""" """
Returns the serial correlation of the interspike intervals. Returns the serial correlation of the interspike intervals.
@ -255,7 +274,7 @@ class BaselineData:
""" """
vss = [] vss = []
spike_phases = [] spike_phases = []
for i, sd in enumerate(self.__spike_data): for i in range(self.size):
phases = self.__spike_phases(i) phases = self.__spike_phases(i)
ms_sin_alpha = np.mean(np.sin(phases)) ** 2 ms_sin_alpha = np.mean(np.sin(phases)) ** 2
ms_cos_alpha = np.mean(np.cos(phases)) ** 2 ms_cos_alpha = np.mean(np.cos(phases)) ** 2
@ -631,7 +650,8 @@ class FileStimulusData:
""" """
self.__spike_data = [] self.__spike_data = []
self.__contrasts = [] self.__contrasts = []
self.__stimuli = [] self.__stimulus_files = []
self.__stimulus_settings = []
self.__delays = [] self.__delays = []
self.__durations = [] self.__durations = []
self.__dataset = dataset self.__dataset = dataset
@ -641,6 +661,13 @@ class FileStimulusData:
self.__stimspikes = None self.__stimspikes = None
self._get_data() self._get_data()
@property
def dataset(self):
return self.__dataset
@property
def cell(self):
return self.__cell
def _get_data(self): def _get_data(self):
if not self.__dataset: if not self.__dataset:
@ -651,15 +678,16 @@ class FileStimulusData:
self.__stimspikes = StimSpikesFile(self.__dataset.data_source) self.__stimspikes = StimSpikesFile(self.__dataset.data_source)
for r in self.__repros: for r in self.__repros:
if self.__dataset.has_nix: if self.__dataset.has_nix:
spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_nix(r) spikes, contrasts, stims, delays, durations, stim_settings = self.__read_spike_data_from_nix(r)
else: else:
spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_directory(r) spikes, contrasts, stims, delays, durations, stim_settings = self.__read_spike_data_from_directory(r)
if spikes is not None and len(spikes) > 0: if spikes is not None and len(spikes) > 0:
self.__spike_data.extend(spikes) self.__spike_data.extend(spikes)
self.__contrasts.extend(contrasts) self.__contrasts.extend(contrasts)
self.__stimuli.extend(stims) self.__stimulus_files.extend(stims)
self.__delays.extend(delays) self.__delays.extend(delays)
self.__durations.extend(durations) self.__durations.extend(durations)
self.__stimulus_settings.extend(stim_settings)
else: else:
continue continue
@ -703,7 +731,7 @@ class FileStimulusData:
delay = float(r_settings["delay"].split(":")[-1]) delay = float(r_settings["delay"].split(":")[-1])
start_time = stimulus.start_time - delay start_time = stimulus.start_time - delay
end_time = stimulus.start_time + mt.extents[stimulus.index] end_time = stimulus.start_time + mt.extents[stimulus.index]
duration = mt.extents[stimulus.index] duration = float(mt.extents[stimulus.index])
contrast = self.__find_contrast(r_settings, s_settings, True) contrast = self.__find_contrast(r_settings, s_settings, True)
spikes = self.__all_spikes[(self.__all_spikes >= start_time) & (self.__all_spikes < end_time)] - start_time - delay spikes = self.__all_spikes[(self.__all_spikes >= start_time) & (self.__all_spikes < end_time)] - start_time - delay
@ -723,9 +751,12 @@ class FileStimulusData:
stim_files = [] stim_files = []
delays = [] delays = []
durations = [] durations = []
settings = []
repro_settings = repro.to_dict
r_settings = yaml.safe_load(repro.settings.replace("\t", ""))
stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id) stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id)
if len(stimuli) == 0: if len(stimuli) == 0:
return spikes, contrasts, stim_files return spikes, contrasts, stim_files, [], [], []
data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix") data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
if not os.path.exists(data_source): if not os.path.exists(data_source):
print("Data not found! Trying from directory") print("Data not found! Trying from directory")
@ -740,14 +771,16 @@ class FileStimulusData:
mt = b.multi_tags[s.multi_tag_id] mt = b.multi_tags[s.multi_tag_id]
sp, c, stim, delay, duration = self.__do_read_spike_data_from_nix(mt, s, repro) sp, c, stim, delay, duration = self.__do_read_spike_data_from_nix(mt, s, repro)
if len(sp) > 0: if len(sp) > 5:
spikes.append(sp) spikes.append(sp)
contrasts.append(c) contrasts.append(c)
stim_files.append(stim) stim_files.append(stim)
delays.append(delay) delays.append(delay)
durations.append(duration) durations.append(duration)
stim_settings = s.to_dict
settings.append({"stimulus": stim_settings, "repro": repro_settings})
f.close() f.close()
return spikes, contrasts, stim_files, delays, contrasts return spikes, contrasts, stim_files, delays, durations, settings
def __read_spike_data_from_directory(self, repro: RePro): def __read_spike_data_from_directory(self, repro: RePro):
stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id) stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id)
@ -756,23 +789,30 @@ class FileStimulusData:
stim_files = [] stim_files = []
delays = [] delays = []
durations = [] durations = []
settings = []
r_settings = yaml.safe_load(repro.settings.replace("\t", "")) r_settings = yaml.safe_load(repro.settings.replace("\t", ""))
r_settings = r_settings["project"] if "project" in r_settings.keys() else r_settings r_settings = r_settings["project"] if "project" in r_settings.keys() else r_settings
repro_settings = repro.to_dict
for s in stimuli: for s in stimuli:
s_settings = yaml.safe_load(s.settings.replace("\t", "")) s_settings = yaml.safe_load(s.settings.replace("\t", ""))
s_settings = s_settings["project"] if "project" in s_settings.keys() else s_settings s_settings = s_settings["project"] if "project" in s_settings.keys() else s_settings
contrast = self.__find_contrast(r_settings, s_settings, False) contrast = self.__find_contrast(r_settings, s_settings, False)
duration = float(s_settings["duration"][:-2]) / 1000 dur, sp = self.__stimspikes.get(s.run, s.index)
sp = self.__stimspikes.get(s.run, s.index) if not sp or len(sp) < 5:
if not sp or len(sp) < 1:
continue continue
if "duration" in s_settings.keys():
duration = float(s_settings["duration"][:-2]) / 1000
else:
duration = dur
contrasts.append(contrast) contrasts.append(contrast)
delays.append(float(r_settings["before"][:-2]) / 1000) delays.append(float(r_settings["before"][:-2]) / 1000)
durations.append(duration) durations.append(duration)
stim_files.append(s_settings["file"]) stim_files.append(s_settings["file"])
spikes.append(sp) spikes.append(sp)
settings.append({"stimulus": s.to_dict, "repro": repro_settings})
return spikes, contrasts, stim_files, delays, durations return spikes, contrasts, stim_files, delays, durations, settings
def read_stimulus(self, index=0): def read_stimulus(self, index=0):
pass pass
@ -789,6 +829,11 @@ class FileStimulusData:
else: else:
raise IndexError("FileStimulusData: index %i out of bounds for spike data of size %i" % (index, self.size)) raise IndexError("FileStimulusData: index %i out of bounds for spike data of size %i" % (index, self.size))
def stimulus_settings(self, index=0):
if index >= self.size:
raise IndexError("FileStimulusData: index %i is out of bounds for spike data of size %i" %(index, self.size))
return self.__stimulus_settings[index]
def contrast(self, index=-1): def contrast(self, index=-1):
if index == -1: if index == -1:
return self.__contrasts return self.__contrasts
@ -799,9 +844,9 @@ class FileStimulusData:
def stimulus_files(self, index=-1): def stimulus_files(self, index=-1):
if index == -1: if index == -1:
return self.__stimuli return self.__stimulus_files
elif index >=0 and index < self.size: elif index >=0 and index < self.size:
return self.__stimuli[index] return self.__stimulus_files[index]
else: else:
raise IndexError("FileStimulusData: index %i out of bounds for contrasts data of size %i" % (index, self.size)) raise IndexError("FileStimulusData: index %i out of bounds for contrasts data of size %i" % (index, self.size))

View File

@ -200,28 +200,31 @@ class StimSpikesFile:
index_map = {} index_map = {}
trial_data = [] trial_data = []
trial_duration = 0.0
index = 0 index = 0
trial = 0 trial = 0
for l in lines: for l in lines:
l = l.strip() l = l.strip()
if "duration:" in l:
trial_duration = float(l[1:].strip().split(":")[-1][:-3])
if "index:" in l: if "index:" in l:
if len(trial_data) > 0: if len(trial_data) > 0:
index_map[(index, trial)] = trial_data index_map[(index, trial)] = (trial_duration, trial_data)
trial_data = [] trial_data = []
index = int(l[1:].strip().split(":")[-1]) index = int(l[1:].strip().split(":")[-1])
if "trial:" in l: if "trial:" in l:
if len(trial_data) > 0: if len(trial_data) > 0:
index_map[(index, trial)] = trial_data index_map[(index, trial)] = (trial_duration, trial_data)
trial_data = [] trial_data = []
trial = int(l[1:].strip().split(":")[-1]) trial = int(l[1:].strip().split(":")[-1])
if len(l) > 0 and "#" not in l: if len(l) > 0 and "#" not in l:
trial_data.append(float(l)/1000) trial_data.append(float(l)/1000)
index_map[(index, trial)] = (trial_duration, trial_data)
return index_map return index_map
def get(self, run_index, trial_index): def get(self, run_index, trial_index):
if tuple([run_index, trial_index]) not in self._data_map.keys(): if tuple([run_index, trial_index]) not in self._data_map.keys():
print("Data not found for run %i and trial %i:" % (run_index, trial_index)) print("Data not found for run %i and trial %i:" % (run_index, trial_index))
return None return None, None
return self._data_map[(run_index, trial_index)] return self._data_map[(run_index, trial_index)]