From 07b60c6622cec0bbfaf1ff53d2c98e9812ce0248 Mon Sep 17 00:00:00 2001 From: Jan Grewe Date: Wed, 5 Aug 2020 19:19:24 +0200 Subject: [PATCH] [frontend] import filestim data from nix rund, add serveral methods --- fishbook/frontend/frontend_classes.py | 24 +- fishbook/frontend/relacs_classes.py | 314 ++++++++++++++++++++------ fishbook/frontend/util.py | 35 ++- 3 files changed, 291 insertions(+), 82 deletions(-) diff --git a/fishbook/frontend/frontend_classes.py b/fishbook/frontend/frontend_classes.py index f1ad200..a4d881c 100644 --- a/fishbook/frontend/frontend_classes.py +++ b/fishbook/frontend/frontend_classes.py @@ -420,7 +420,7 @@ class RePro: @property def dataset(self): datasets = (Cells & "cell_id = '%s'" % self.cell_id) * CellDatasetMap * Datasets - d = datasets.proj('dataset_id', 'data_source', 'experimenter', 'setup', 'recording_date', + d = datasets.proj('dataset_id', 'data_source', 'data_host', 'experimenter', 'setup', 'recording_date', 'quality', 'comment', 'duration', 'has_nix').fetch(limit=1, as_dict=True)[0] del d["cell_id"] return Dataset(tuple=d) @@ -447,7 +447,8 @@ class RePro: return [Stimulus(tuple=s) for s in stims] @staticmethod - def find(name=None, cell_id=None, cell_type=None, species=None, settings=[], quality=None, test=False): + def find(name=None, cell_id=None, cell_type=None, species=None, settings=[], quality=None, + min_date=None, max_date=None, test=False): """ Finds Repro runs in the database. When called without arguments, all RePro runs are found and returned. Search can be narrowed by providing further search hints. @@ -461,6 +462,8 @@ class RePro: species (str, optional): The species name, or parts of it. Defaults to None. settings (list of string, optional): List of repro settings e.g. ["contrast: 20", "am: false"]. An AND connection is assumed. Defaults to None. quality (str, optional): The quality assessment. Defaults to None. + min_date (datetime, optional): the minimum recording date. Dates may be given as datetime objects or string of the format "2010.01.01" or "2010-01-01". Defaults to None. + max_date (datetime, optional): the maximum recording date. Defaults to None. test (bool, optional): defines whether or not the database matches should be fetched from the database. Returns: list of fishbook.RePro: list of results or empty list if test == True @@ -481,10 +484,15 @@ class RePro: cells = cells & "cell_type like '%{0:s}%'".format(cell_type) if species: cells = cells & "species like '%{0:s}%'".format(species) + if min_date and isinstance(min_date, (dt.date, str)): + cells = cells & "recording_date >= '%s'" % min_date + if max_date and isinstance(min_date, (dt.date, str)): + cells = cells & "recording_date < '%s'" % max_date if quality: cells = cells & "quality like '{0:s}'".format(quality) - p = cells.proj("quality", "experimenter") + p = cells.proj("quality", "experimenter") repros = repros & p + results = [] total = len(repros) @@ -500,7 +508,7 @@ class RePro: return self.__tuple.copy() def __str__(self): - str = "RePro: %s\t id: %s\n" % (self.name, self.id) + str = "RePro id: %s\t repro name: %s\n" % (self.id, self.name) str += "run: %i\t on cell: %s\n" %(self.run, self.cell_id) str += "start time: %s\t duration: %s\n" % (self.start, self.duration) return str @@ -537,11 +545,13 @@ class Stimulus: @property def cell(self): - return Cells & "cell_id = %s" % self.__tuple["cell_id"] + embed() # FIXME + return Cells & ("cell_id = %s" % self.__tuple["cell_id"]) @property def repro(self): - return Repros & "repro_id = %s" % self.__tuple["repro_id"] + embed() # FIXME + return Repros & ("repro_id = %s" % self.__tuple["repro_id"]) @property def start_time(self): @@ -606,7 +616,7 @@ class Stimulus: progress(i+1, total, "fetching %i matches" % total) return results, total - + class Subject: """Representation of the recorded subject's properties. diff --git a/fishbook/frontend/relacs_classes.py b/fishbook/frontend/relacs_classes.py index 6a441c2..505b7c5 100644 --- a/fishbook/frontend/relacs_classes.py +++ b/fishbook/frontend/relacs_classes.py @@ -1,5 +1,5 @@ from .frontend_classes import Dataset, RePro, Stimulus -from .util import BoltzmannFit, unzip_if_needed, gaussian_kernel, zero_crossings +from .util import BoltzmannFit, unzip_if_needed, gaussian_kernel, zero_crossings, spike_times_to_rate import numpy as np import nixio as nix from scipy.stats import circstd @@ -7,6 +7,7 @@ from scipy.stats import circstd import os import subprocess from tqdm import tqdm +import yaml from IPython import embed @@ -14,6 +15,8 @@ from IPython import embed class BaselineData: """ Class representing the Baseline data that has been recorded within a given Dataset. + + This class provides access to basic measures estimated from the baseline activity. """ def __init__(self, dataset: Dataset): self.__spike_data = [] @@ -66,8 +69,11 @@ class BaselineData: def serial_correlation(self, max_lags=50): """ Returns the serial correlation of the interspike intervals. - @param max_lags: The number of lags to take into account - @return: the serial correlation as a function of the lag + + Args + max_lags (int, optional): The number of lags to take into account + Returns + list of float: the serial correlation as a function of the lag """ scs = [] for sd in self.__spike_data: @@ -79,13 +85,24 @@ class BaselineData: return scs def circular_std(self): + """The circular standard deviation of the baseline spikes. The circ. std. is given in radiant. + + Returns: + list of float: for each run of the baseline RePro there will be one entry. + """ circular_stds = [] for i in range(self.size): phases = self.__spike_phases(index=i) circular_stds.append(circstd(phases)) return circular_stds + @property def eod_frequency(self): + """The average baseline EOD frequency in Hz. + + Returns: + float: the EOD frequency averaged across runs. Given in Hz. + """ eod_frequencies = [] for i in range(self.size): eod, time = self.eod(i) @@ -105,6 +122,15 @@ class BaselineData: return phases def eod_times(self, index=0, interpolate=True): + """The times of the detected EODs. + + Args: + index (int, optional): The run of the BaselineActivity RePro. Defaults to 0. + interpolate (bool, optional): Defines whether a simple threshold mechanism is used or times are interpolated. Defaults to True. + + Returns: + numpy.ndarray: the eod times. + """ if index >= self.size: return None if len(self.__eod_times) < len(self.__eod_data): @@ -135,11 +161,22 @@ class BaselineData: index (int, optional): If the baseline activity has been recorded several times, the index can be given. Defaults to 0. Returns: - : [description] + numpy.adarray: the spike times """ return self.__spike_data[index] if len(self.__spike_data) >= index else None def membrane_voltage(self, index: int=0): + """[summary] + + Args: + index (int, optional): [description]. Defaults to 0. + + Raises: + IndexError: [description] + + Returns: + [type]: [description] + """ if index >= self.size: raise IndexError("Index %i out of bounds for size %i!" % (index, self.size)) if not self.__dataset.has_nix: @@ -158,18 +195,33 @@ class BaselineData: data = t.retrieve_data("V-1")[:] time = np.asarray(t.references["V-1"].dimensions[0].axis(len(data))) except: - data = np.empty() - time = np.empty() + data = np.empty(0) + time = np.empty(0) f.close() return time, data def eod(self, index: int=0): + """Returns the EOD data for a given run of the BaselineActivity RePro. + + Args: + index (int, optional): The run index. Defaults to 0. + + Returns: + numpy.ndarray: The eod trace. + numpy.ndarray: A matching time axis starting at time zero. + + """ eod = self.__eod_data[index] if len(self.__eod_data) >= index else None time = np.arange(len(eod)) / self.__dataset.samplerate return eod, time @property def burst_index(self): + """Fraction of spikes that occur in intervals of less than 1.5 times the EOD period. + + Returns: + list of float: burst indices for each repro run. + """ bi = [] for i, sd in enumerate(self.__spike_data): if len(sd) < 2: @@ -182,6 +234,11 @@ class BaselineData: @property def coefficient_of_variation(self): + """Coefficient of variation of the interspike intervals. + + Returns: + list of float: for each baseline repro run a single value of the CV. + """ cvs = [] for d in self.__spike_data: isis = np.diff(d) @@ -190,6 +247,12 @@ class BaselineData: @property def vector_strength(self): + """The vector strength with which the spikes lock to the fish's own EOD + + Returns: + list of float: the vector strength calculated separatedly for each repro run. + list of numpy.ndarray: the spike phases within the EOD period (in radiant). + """ vss = [] spike_phases = [] for i, sd in enumerate(self.__spike_data): @@ -203,6 +266,11 @@ class BaselineData: @property def size(self): + """The number of times the BaselineActivity RePro was run. + + Returns: + int: the number of baseline repro runs + """ return len(self.__spike_data) def __str__(self): @@ -221,7 +289,7 @@ class BaselineData: try: data = t.retrieve_data("EOD")[:] except: - data = np.empty() + data = np.empty(0) f.close() return data @@ -244,6 +312,8 @@ class BaselineData: print("Tag not found!") try: data = t.retrieve_data("Spikes-1")[:] + if data[0] < 0: + data = data[1:] # this is related to a nix::RangeDimension bug, should be fixed beyond 1.4.9 except: data = None @@ -284,19 +354,17 @@ class BaselineData: return np.asarray(data) - class FIData: - """ - Class representing the data recorded with the relacs FI-Curve repro. The instance will load the data upon - construction which may take a while. - FI Data offers convenient access to the spike and local EOD data as well as offers conveince methods to get the - firing rate and also to fit a Boltzmann function to the the FI curve. + """Class representing the data recorded with the relacs FI-Curve repro. The instance will load the data upon + construction which may take a while. + FI Data offers convenient access to the spike and local EOD data as well as offers conveince methods to get the + firing rate and also to fit a Boltzmann function to the the FI curve. """ def __init__(self, dataset: Dataset): - """ - Constructor. + """Constructor. - :param dataset: The dataset entity for which the fi curve repro data should be loaded. + Args: + fishbook.Dataset: The dataset entity for which the fi curve repro data should be loaded. """ self.__spike_data = [] self.__contrasts = [] @@ -306,7 +374,8 @@ class FIData: self.__repros = None self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell self._get_data() - pass + if self.size < 1: + print("No FICurve data was recorded in dataset %s" % self.__dataset.id) def _get_data(self): if not self.__dataset: @@ -443,16 +512,19 @@ class FIData: """ The number of recorded trials - :return: An integer with the number of trials. + returns + int: the number of trials. """ return len(self.__spike_data) def spikes(self, index=-1): - """ - The spike times recorded in the specified trial(s) + """The spike times recorded in the specified trial(s) - :param index: the index of the trial. Default of -1 indicates that all data should be returned. - :return: + Args: + int, optional: the index of the trial. Default of -1 indicates that all data should be returned. + + Returns: + list of numpy.ndarray: the spike trains. """ if 0 <= index < self.size: return self.__spike_data[index] @@ -460,11 +532,13 @@ class FIData: return self.__spike_data def eod(self, index=-1): - """ - The local eod (including the stimulus) measurement of the selected trial(s). + """ The local eod (including the stimulus) measurement of the selected trial(s). - :param index: the index of the trial. Default of -1 indicates that all data should be returned. - :return: Either two vectors representing time and the local eod or two lists of such vectors + Args: + int, optional: the index of the trial. Default of -1 indicates that all data should be returned. + + Returns: + Either two vectors representing time and the local eod or two lists of such vectors """ if len(self.__eod_data) == 0: print("EOD data not available for old-style relacs data.") @@ -475,11 +549,13 @@ class FIData: return self.__eod_times, self.__eod_data def contrast(self, index=-1): - """ - The stimulus contrast used in the respective trial(s). + """ The stimulus contrast used in the respective trial(s). - :param index: the index of the trial. Default of -1 indicates that all data should be returned. - :return: Either a single scalar representing the contrast, or a list of such scalars, one entry for each trial. + Args: + int, optional: the index of the trial. Default of -1 indicates that all data should be returned. + + Returns: + Either a single scalar representing the contrast, or a list of such scalars, one entry for each trial. """ if 0 <= index < self.size: return self.__contrasts[index] @@ -499,22 +575,20 @@ class FIData: return self.__eod_times def rate(self, index=0, kernel_width=0.005): - """ - Returns the firing rate for a single trial. + """ Returns the firing rate for a single trial. Firing rate estimation using the kernel convolution method. + + Args: + int, optional: The index of the trial. 0 <= index < size + float, optional: kernel_width: The width of the gaussian kernel in seconds. Defaults to 0.005 s - :param index: The index of the trial. 0 <= index < size - :param kernel_width: The width of the gaussian kernel in seconds - :return: tuple of time and rate + Returns: + numpy.ndarray: a vector representing time + numpy.ndarray: a vector containing the firing rate. """ t = self.time_axis(index) - dt = np.mean(np.diff(t)) sp = self.spikes(index) - binary = np.zeros(t.shape) - spike_indices = ((sp - t[0]) / dt).astype(int) - binary[spike_indices[(spike_indices >= 0) & (spike_indices < len(binary))]] = 1 - g = gaussian_kernel(kernel_width, dt) - rate = np.convolve(binary, g, mode='same') - return t, rate + r = spike_times_to_rate(sp, t, kernel_width) + return t, r def boltzmann_fit(self, start_time=0.01, end_time=0.05, kernel_width=0.005): """ @@ -527,6 +601,9 @@ class FIData: :param kernel_width: standard deviation of the Gaussian kernel used for firing rate estimation. :return: object of type BoltzmannFit """ + if self.size < 1: + print("No FICurve data recorded in dataset %s" % self.__dataset.id) + return None contrasts = np.zeros(self.size) rates = np.zeros(self.size) for i in range(self.size): @@ -555,9 +632,12 @@ class FileStimulusData: self.__spike_data = [] self.__contrasts = [] self.__stimuli = [] + self.__delays = [] + self.__durations = [] self.__dataset = dataset self.__repros = None self.__cell = dataset.cells[0] # Beware: Assumption that there is only a single cell + self.__all_spikes = None self._get_data() def _get_data(self): @@ -565,52 +645,64 @@ class FileStimulusData: return self.__repros, _ = RePro.find("FileStimulus", cell_id=self.__cell.id) for r in self.__repros: - sd, c, stims = self.__read_spike_data_from_nix(r) if self.__dataset.has_nix else self.__read_spike_data_from_directory(r) - if sd is not None and len(sd) > 1: - self.__spike_data.extend(sd) - self.__contrasts.extend(c) + if self.__dataset.has_nix: + spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_nix(r) + else: + spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_directory(r) # TODO + if spikes is not None and len(spikes) > 1: + self.__spike_data.extend(spikes) + self.__contrasts.extend(contrasts) self.__stimuli.extend(stims) + self.__delays.extend(delays) + self.__durations.extend(durations) else: continue + def __find_contrast(self, repro_settings, stimulus_settings, has_nix=True): + contrast = 0.0 + for k in repro_settings.keys(): + if k.lower() == "contrast": + contrast = float(repro_settings[k].split("+")[0]) * (100 if has_nix else 1) + + if contrast < 0.00001: + for k in stimulus_settings.keys(): + if k.lower() == "contrast": + contrast = float(stimulus_settings[k].split("+")[0]) * (100 if has_nix else 1) + return contrast + def __do_read_spike_data_from_nix(self, mt: nix.pycore.MultiTag, stimulus: Stimulus, repro: RePro): spikes = None contrast = 0.0 - stim_file = "" - - r_settings = repro.settings.split("\n") - s_settings = stimulus.settings.split("\n") + + r_settings = yaml.safe_load(repro.settings.replace("\t", "")) + s_settings = yaml.safe_load(stimulus.settings.replace("\t", "")) + stim_file = r_settings["file"] delay = 0.0 - for s in r_settings: - if "delay:" in s: - delay = float(s.split(":")[-1]) - break + if "delay:" in map(str.lower, r_settings.keys()): + delay = float(s.split(":")[-1]) start_time = stimulus.start_time - delay end_time = stimulus.start_time + mt.extents[stimulus.index] - contrast = 0.0 # this is a quick fix!!! - embed() - for s in s_settings: - if "Contrast:" in s and "PreContrast" not in s and "\t\t" not in s and "+-" not in s: - contrast = float(s.split(":")[-1]) - break - - return spikes, contrast, stim_file - - - - - local_eod = eod_da[start_index_eod:end_index_eod] + duration = mt.extents[stimulus.index] + contrast = self.__find_contrast(r_settings, s_settings, True) + spikes = self.__all_spikes[(self.__all_spikes >= start_time) & (self.__all_spikes < end_time)] - start_time - delay + + return spikes, contrast, stim_file, delay, duration + """ + local_eod = eod_da[start_index_eod:end_index_eod] time = np.asarray(eod_da.dimensions[0].axis(end_index_eod - start_index_eod)) - delay return spikes, local_eod, time, contrast return spikes, contrast, stim_file + """ def __read_spike_data_from_nix(self, repro: RePro): spikes = [] contrasts = [] stim_files = [] - stimuli = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id) + delays = [] + durations = [] + stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id) if len(stimuli) == 0: return spikes, contrasts, stim_files data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix") @@ -625,26 +717,100 @@ class FileStimulusData: s = stimuli[i] if not mt or mt.id != s.multi_tag_id: mt = b.multi_tags[s.multi_tag_id] - sp, c, stim = self.__do_read_spike_data_from_nix(mt, s, repro) + + sp, c, stim, delay, duration = self.__do_read_spike_data_from_nix(mt, s, repro) spikes.append(sp) contrasts.append(c) stim_files.append(stim) + delays.append(delay) + durations.append(duration) f.close() - return spikes, contrasts, stim_files + return spikes, contrasts, stim_files, delays, contrasts def __read_spike_data_from_directory(self, repro: RePro): - print("not yet my friend!") + print("not yet, my friend!") spikes = [] - contrast = 0.0 - stim = None + contrasts = [] + stim_files = [] + delays = [] + durations = [] - return spikes, contrast, stim + return spikes, contrasts, stim_files, delays, durations def read_stimulus(self, index=0): pass + + @property + def size(self): + return len(self.__spike_data) + + def spikes(self, index=-1): + if index == -1: + return self.__spike_data + elif index >= 0 and index < self.size: + return self.__spike_data[index] + else: + raise IndexError("FileStimulusData: index %i out of bounds for spike data of size %i" % (index, self.size)) + def contrast(self, index=-1): + if index == -1: + return self.__contrasts + elif index >=0 and index < self.size: + return self.__contrasts[index] + else: + raise IndexError("FileStimulusData: index %i out of bounds for contrasts data of size %i" % (index, self.size)) + def stimulus_files(self, index=-1): + if index == -1: + return self.__stimuli + elif index >=0 and index < self.size: + return self.__stimuli[index] + else: + raise IndexError("FileStimulusData: index %i out of bounds for contrasts data of size %i" % (index, self.size)) + + def time_axis(self, index=-1): + """ + Get the time axis of a single trial or a list of time-vectors for all trials. + + :param index: the index of the trial. Default of -1 indicates that all data should be returned. + :return: Either a single vector representing time, or a list of such vectors, one for each trial. + """ + if 0 <= index < self.size: + delay = self.__delays[index] + duration = self.__durations[index] + return np.arange(delay, duration, 1./self.__dataset.samplerate) + elif index == -1: + axes = [] + for i in range(self.size): + delay = self.__delays[i] + duration = self.__durations[i] + axes.append(np.arange(delay, duration, 1./self.__dataset.samplerate)) + return axes + else: + raise IndexError("FileStimulusData: index %i out of bounds for time_axes of size %i" % (index, self.size)) + + def rate(self, index=-1, kernel_width=0.005): + if index == -1: + time_axes = [] + rates = [] + for i in range(self.size): + t = self.time_axis(i) + spikes = self.spikes(i) + r = spike_times_to_rate(spikes, t, kernel_width) + time_axes.append(t) + rates.append(r) + return time_axes, rates + elif index >= 0 and index < self.size: + t = self.time_axis(index) + spikes = self.spikes(index) + r = spike_times_to_rate(spikes, t, kernel_width) + return t, r + else: + raise IndexError("FileStimulusData: index %i out of bounds for time_axes of size %i" % (index, self.size)) + + if __name__ == "__main__": + # dataset = Dataset(dataset_id='2011-06-14-ag') dataset = Dataset(dataset_id="2018-09-13-ac-invivo-1") # dataset = Dataset(dataset_id='2013-04-18-ac') diff --git a/fishbook/frontend/util.py b/fishbook/frontend/util.py index 1a00d8a..bd2fa36 100644 --- a/fishbook/frontend/util.py +++ b/fishbook/frontend/util.py @@ -1,6 +1,25 @@ import numpy as np from scipy.optimize import curve_fit +def spike_times_to_rate(spike_times, time_axis, kernel_width=0.005): + """Convert spike times to a rate by means of kernel convolution. A Gaussian kernel of the desired width is used. + + Args: + spike_times (numpy.ndarray): the spike times in seconds. + time_axis (np.ndarray): the time axis with a proper resolution and extent. (in seconds) + kernel_width (float, optional): the standard deviation of the Gausian kernel. Defaults to 0.005. + + Returns: + np.ndarray: the firing rate in Hz. + """ + dt = np.mean(np.diff(time_axis)) + binary = np.zeros(time_axis.shape) + spike_indices = ((spike_times - time_axis[0]) / dt).astype(int) + binary[spike_indices[(spike_indices >= 0) & (spike_indices < len(binary))]] = 1 + g = gaussian_kernel(kernel_width, dt) + rate = np.convolve(binary, g, mode='same') + return rate + def safe_get_val(dictionary:dict, key, default=None): return dictionary[key] if key in dictionary.keys() else default @@ -45,6 +64,12 @@ def zero_crossings(x, t, interpolate=False): def unzip_if_needed(dataset, tracename='trace-1.raw'): + """[summary] + + Args: + dataset ([type]): [description] + tracename (str, optional): [description]. Defaults to 'trace-1.raw'. + """ file_name = os.path.join(dataset, tracename) if os.path.exists(file_name): return @@ -54,12 +79,20 @@ def unzip_if_needed(dataset, tracename='trace-1.raw'): def gaussian_kernel(sigma, dt): + """Creates a gaussian kernel with the integral of one. + + Args: + sigma ([type]): [description] + dt ([type]): [description] + + Returns: + [type]: [description] + """ x = np.arange(-4. * sigma, 4. * sigma, dt) y = np.exp(-0.5 * (x / sigma) ** 2) / np.sqrt(2. * np.pi) / sigma return y - class BoltzmannFit: """ Class representing a fit of a Boltzmann function to some data.