Merge pull request 'several fixes and improvements for handling old relacs files' (#4) from relacs into master

Reviewed-on: jgrewe/fishBook#4
This commit is contained in:
csachgau 2020-08-14 17:04:41 +02:00
commit e95dd763a4
5 changed files with 123 additions and 33 deletions

View File

@ -457,14 +457,7 @@ def scan_folder_for_repros(dataset):
cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
for rs, si in zip(repro_settings, stim_indices):
rp = Repros.get_template_tuple()
path = []
if not find_key_recursive(rs, "run", path):
find_key_recursive(rs, "Run", path)
if len(path) > 0:
rp["run"] = deep_get(rs, path, 0)
else:
rp["run"] = -1
path = []
if not find_key_recursive(rs, "repro", path):
find_key_recursive(rs, "RePro", path)
@ -474,7 +467,16 @@ def scan_folder_for_repros(dataset):
if rp["repro_name"] in repro_counts.keys():
repro_counts[rp["repro_name"]] += 1
else:
repro_counts[rp["repro_name"]] = 1
repro_counts[rp["repro_name"]] = 0
path = []
if not find_key_recursive(rs, "run", path):
find_key_recursive(rs, "Run", path)
if len(path) > 0:
rp["run"] = deep_get(rs, path, 0)
else: # the run information is not there and needs to be fixed!
rp["run"] = repro_counts[rp["repro_name"]]
rp["cell_id"] = cell_id
rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]])
rp["start"] = 0.

View File

@ -84,7 +84,7 @@ def has_signal(line, col_names):
values = line.split()
for i, n in enumerate(col_names):
if n.lower() == "signal" and i < len(values):
if len(values[i].strip()) > 0 and values[i].strip()[0] != "-":
if len(values[i].strip()) > 0 and (values[i].strip()[0] != "-" and values[i].strip() != "init"):
return True
return False

View File

@ -6,6 +6,7 @@ import numpy as np
from IPython import embed
from fishbook.backend.util import progress
import datetime as dt
import yaml
class Cell:
"""The Cell class represents a recorded cell. It is characterized by *id*, the cell *type*, the *firing_rate*, and the recording *location*.
@ -162,7 +163,7 @@ class Dataset:
if tuple:
self.__tuple = tuple
elif dataset_id:
pattern = "dataset_id like '{0:s}'".format(dataset_id)
pattern = "dataset_id like '%{0:s}%'".format(dataset_id)
dsets = (Datasets & pattern)
results_check(dsets, dataset_id, "Dataset ID")
self.__tuple = dsets.fetch(limit=1, as_dict=True)[0]
@ -312,11 +313,12 @@ class Dataset:
return len(Datasets())
@staticmethod
def find(min_duration=None, experimenter=None, quality=None, min_date=None, max_date=None, test=False):
def find(dataset_id=None, min_duration=None, experimenter=None, quality=None, min_date=None, max_date=None, test=False):
"""Find dataset entries in the database. You may restrict the search by providing the following arguments. All restrictions are connected
with a logical AND.
Args:
dataset_id (str, optional): the id of the given Dataset, if unique, all other restrictions will be ignored. Defaults to None
min_duration (float, optional): minimum duration of the recording session, if not given, any length datasets will be returned. Defaults to None.
experimenter (str, optional): the name of the one who did the recording. The name does not need to be the full name. Defaults to None.
quality (str, optional): the quality assigned to the dataset during recording (e.g. good, fair, poor). Defaults to None.
@ -328,6 +330,10 @@ class Dataset:
int: Count of matching results
"""
dataset_list = Datasets()
if dataset_id:
dataset_list = dataset_list & "dataset_id like '%{0:s}%'".format(dataset_id)
if len(dataset_list) == 1:
return [Dataset(tuple=dataset_list.fetch(as_dict=True)[0])], 1
if min_duration:
dataset_list = dataset_list & "duration > %.2f" % min_duration
if experimenter:
@ -377,6 +383,16 @@ class Dataset:
def _tuple(self):
return self.__tuple.copy()
@property
def yaml(self):
settings = yaml.dump({"dataset id": self.id, "recording date": self.recording_date,
"duration": self.recording_duration, "comment": self.comment,
"experimenter": self.experimenter, "quality": self.quality,
"data_source": self.data_source, "host": self.data_host,
"setup": self.setup, "nixed": self.has_nix})
return settings
def __str__(self):
str = "dataset id: %s\n" % self.id
str += "recorded: %s \t by: %s\n" % (self.recording_date, self.experimenter)
@ -385,6 +401,8 @@ class Dataset:
str += "comment: %s" % self.comment
return str
def __repr__(self):
return self.__str__()
class RePro:
"""The RePro class represents an entry in the repro table. This is a run of a certain relacs "Research Protocol".
@ -513,6 +531,17 @@ class RePro:
str += "start time: %s\t duration: %s\n" % (self.start, self.duration)
return str
def __repr__(self):
return self.__str__()
@property
def to_dict(self):
r_settings = yaml.safe_load(self.settings.replace("\t", " "))
settings = {"repro id": self.id, "run": self.run, "cell": self.cell_id,
"name": self.name, "start": self.start, "duration": self.duration,
"settings":r_settings}
return settings
class Stimulus:
"""The stimulus class represents a Stimulus that was presented. A Stimulus has several properties
@ -617,6 +646,17 @@ class Stimulus:
return results, total
def __repr__(self):
return self.__str__()
@property
def to_dict(self):
s_settings = yaml.safe_load(self.settings.replace("\t", " "))
settings = {"id": self.id, "run": self.run, "stimulus name" : self.name,
"stimulus index": self.index, "duration": self.duration, "start time": self.start_time,
"name": self.name, "settings": s_settings}
return settings
class Subject:
"""Representation of the recorded subject's properties.

View File

@ -18,7 +18,11 @@ class BaselineData:
This class provides access to basic measures estimated from the baseline activity.
"""
def __init__(self, dataset: Dataset):
def __init__(self, dataset=None, dataset_id=None):
d, _ = Dataset.find(dataset_id=dataset_id)
if len(d) == 0 or len(d) > 1:
raise ValueError("Dataset id not found or not unique")
dataset = d[0]
self.__spike_data = []
self.__eod_data = []
self.__eod_times = []
@ -66,6 +70,21 @@ class BaselineData:
a_corr = a_corr[int(len(a_corr) / 2):]
return a_corr[:max_lags]
@property
def baseline_rate(self):
"""The average baseline firing rate for each run of the baseline repro
Returns:
list of float: the average firing rate
"""
rates = []
for i in range(self.size):
spikes = self.spikes(i)
max_time = np.floor(spikes)[-1]
min_time = np.ceil(spikes)[0]
rates.append(len(spikes[(spikes >= min_time) & (spikes < max_time)])/(max_time - min_time))
return rates
def serial_correlation(self, max_lags=50):
"""
Returns the serial correlation of the interspike intervals.
@ -255,7 +274,7 @@ class BaselineData:
"""
vss = []
spike_phases = []
for i, sd in enumerate(self.__spike_data):
for i in range(self.size):
phases = self.__spike_phases(i)
ms_sin_alpha = np.mean(np.sin(phases)) ** 2
ms_cos_alpha = np.mean(np.cos(phases)) ** 2
@ -631,7 +650,8 @@ class FileStimulusData:
"""
self.__spike_data = []
self.__contrasts = []
self.__stimuli = []
self.__stimulus_files = []
self.__stimulus_settings = []
self.__delays = []
self.__durations = []
self.__dataset = dataset
@ -641,6 +661,13 @@ class FileStimulusData:
self.__stimspikes = None
self._get_data()
@property
def dataset(self):
return self.__dataset
@property
def cell(self):
return self.__cell
def _get_data(self):
if not self.__dataset:
@ -651,15 +678,16 @@ class FileStimulusData:
self.__stimspikes = StimSpikesFile(self.__dataset.data_source)
for r in self.__repros:
if self.__dataset.has_nix:
spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_nix(r)
spikes, contrasts, stims, delays, durations, stim_settings = self.__read_spike_data_from_nix(r)
else:
spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_directory(r)
spikes, contrasts, stims, delays, durations, stim_settings = self.__read_spike_data_from_directory(r)
if spikes is not None and len(spikes) > 0:
self.__spike_data.extend(spikes)
self.__contrasts.extend(contrasts)
self.__stimuli.extend(stims)
self.__stimulus_files.extend(stims)
self.__delays.extend(delays)
self.__durations.extend(durations)
self.__stimulus_settings.extend(stim_settings)
else:
continue
@ -703,7 +731,7 @@ class FileStimulusData:
delay = float(r_settings["delay"].split(":")[-1])
start_time = stimulus.start_time - delay
end_time = stimulus.start_time + mt.extents[stimulus.index]
duration = mt.extents[stimulus.index]
duration = float(mt.extents[stimulus.index])
contrast = self.__find_contrast(r_settings, s_settings, True)
spikes = self.__all_spikes[(self.__all_spikes >= start_time) & (self.__all_spikes < end_time)] - start_time - delay
@ -723,9 +751,12 @@ class FileStimulusData:
stim_files = []
delays = []
durations = []
settings = []
repro_settings = repro.to_dict
r_settings = yaml.safe_load(repro.settings.replace("\t", ""))
stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id)
if len(stimuli) == 0:
return spikes, contrasts, stim_files
return spikes, contrasts, stim_files, [], [], []
data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
if not os.path.exists(data_source):
print("Data not found! Trying from directory")
@ -740,14 +771,16 @@ class FileStimulusData:
mt = b.multi_tags[s.multi_tag_id]
sp, c, stim, delay, duration = self.__do_read_spike_data_from_nix(mt, s, repro)
if len(sp) > 0:
if len(sp) > 5:
spikes.append(sp)
contrasts.append(c)
stim_files.append(stim)
delays.append(delay)
durations.append(duration)
stim_settings = s.to_dict
settings.append({"stimulus": stim_settings, "repro": repro_settings})
f.close()
return spikes, contrasts, stim_files, delays, contrasts
return spikes, contrasts, stim_files, delays, durations, settings
def __read_spike_data_from_directory(self, repro: RePro):
stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id)
@ -756,23 +789,30 @@ class FileStimulusData:
stim_files = []
delays = []
durations = []
settings = []
r_settings = yaml.safe_load(repro.settings.replace("\t", ""))
r_settings = r_settings["project"] if "project" in r_settings.keys() else r_settings
repro_settings = repro.to_dict
for s in stimuli:
s_settings = yaml.safe_load(s.settings.replace("\t", ""))
s_settings = s_settings["project"] if "project" in s_settings.keys() else s_settings
contrast = self.__find_contrast(r_settings, s_settings, False)
duration = float(s_settings["duration"][:-2]) / 1000
sp = self.__stimspikes.get(s.run, s.index)
if not sp or len(sp) < 1:
dur, sp = self.__stimspikes.get(s.run, s.index)
if not sp or len(sp) < 5:
continue
if "duration" in s_settings.keys():
duration = float(s_settings["duration"][:-2]) / 1000
else:
duration = dur
contrasts.append(contrast)
delays.append(float(r_settings["before"][:-2]) / 1000)
durations.append(duration)
stim_files.append(s_settings["file"])
spikes.append(sp)
return spikes, contrasts, stim_files, delays, durations
settings.append({"stimulus": s.to_dict, "repro": repro_settings})
return spikes, contrasts, stim_files, delays, durations, settings
def read_stimulus(self, index=0):
pass
@ -789,6 +829,11 @@ class FileStimulusData:
else:
raise IndexError("FileStimulusData: index %i out of bounds for spike data of size %i" % (index, self.size))
def stimulus_settings(self, index=0):
if index >= self.size:
raise IndexError("FileStimulusData: index %i is out of bounds for spike data of size %i" %(index, self.size))
return self.__stimulus_settings[index]
def contrast(self, index=-1):
if index == -1:
return self.__contrasts
@ -799,9 +844,9 @@ class FileStimulusData:
def stimulus_files(self, index=-1):
if index == -1:
return self.__stimuli
return self.__stimulus_files
elif index >=0 and index < self.size:
return self.__stimuli[index]
return self.__stimulus_files[index]
else:
raise IndexError("FileStimulusData: index %i out of bounds for contrasts data of size %i" % (index, self.size))

View File

@ -200,28 +200,31 @@ class StimSpikesFile:
index_map = {}
trial_data = []
trial_duration = 0.0
index = 0
trial = 0
for l in lines:
l = l.strip()
if "duration:" in l:
trial_duration = float(l[1:].strip().split(":")[-1][:-3])
if "index:" in l:
if len(trial_data) > 0:
index_map[(index, trial)] = trial_data
index_map[(index, trial)] = (trial_duration, trial_data)
trial_data = []
index = int(l[1:].strip().split(":")[-1])
if "trial:" in l:
if len(trial_data) > 0:
index_map[(index, trial)] = trial_data
index_map[(index, trial)] = (trial_duration, trial_data)
trial_data = []
trial = int(l[1:].strip().split(":")[-1])
if len(l) > 0 and "#" not in l:
trial_data.append(float(l)/1000)
index_map[(index, trial)] = (trial_duration, trial_data)
return index_map
def get(self, run_index, trial_index):
if tuple([run_index, trial_index]) not in self._data_map.keys():
print("Data not found for run %i and trial %i:" % (run_index, trial_index))
return None
return None, None
return self._data_map[(run_index, trial_index)]