Merge pull request 'several fixes and improvements for handling old relacs files' (#4) from relacs into master

Reviewed-on: jgrewe/fishBook#4
2020-08-14 17:04:41 +02:00 · 2020-08-14 17:04:41 +02:00 · e95dd763a4
commit e95dd763a4
parent 29f971b892 0da2bfa110
5 changed files with 123 additions and 33 deletions
--- a/fishbook/backend/database.py
+++ b/fishbook/backend/database.py
@ -457,14 +457,7 @@ def scan_folder_for_repros(dataset):
    cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
    for rs, si in zip(repro_settings, stim_indices):
        rp = Repros.get_template_tuple()
-        path = []
-        if not find_key_recursive(rs, "run", path):
-            find_key_recursive(rs, "Run", path)
-        if len(path) > 0:
-            rp["run"] = deep_get(rs, path, 0)
-        else:
-            rp["run"] = -1
-
+        
        path = []
        if not find_key_recursive(rs, "repro", path):
            find_key_recursive(rs, "RePro", path)
@ -474,7 +467,16 @@ def scan_folder_for_repros(dataset):
        if rp["repro_name"] in repro_counts.keys():
            repro_counts[rp["repro_name"]] += 1
        else:
-            repro_counts[rp["repro_name"]] = 1
+            repro_counts[rp["repro_name"]] = 0
+        
+        path = []      
+        if not find_key_recursive(rs, "run", path):
+            find_key_recursive(rs, "Run", path)
+        if len(path) > 0:
+            rp["run"] = deep_get(rs, path, 0)
+        else: # the run information is not there and needs to be fixed!
+            rp["run"] = repro_counts[rp["repro_name"]]    
+        
        rp["cell_id"] = cell_id
        rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]])
        rp["start"] = 0.
--- a/fishbook/backend/util.py
+++ b/fishbook/backend/util.py
@ -84,7 +84,7 @@ def has_signal(line, col_names):
    values = line.split()
    for i, n in enumerate(col_names):
        if n.lower() == "signal" and i < len(values):
-            if len(values[i].strip()) > 0 and values[i].strip()[0] != "-":
+            if len(values[i].strip()) > 0 and (values[i].strip()[0] != "-" and values[i].strip() != "init"):
                return True
    return False

--- a/fishbook/frontend/frontend_classes.py
+++ b/fishbook/frontend/frontend_classes.py
@ -6,6 +6,7 @@ import numpy as np
 from IPython import embed
 from fishbook.backend.util import progress
 import datetime as dt
+import yaml

 class Cell:
    """The Cell class represents a recorded cell. It is characterized by *id*, the cell *type*, the *firing_rate*, and the recording *location*.
@ -162,7 +163,7 @@ class Dataset:
        if tuple:
            self.__tuple = tuple
        elif dataset_id:
-            pattern = "dataset_id like '{0:s}'".format(dataset_id)
+            pattern = "dataset_id like '%{0:s}%'".format(dataset_id)
            dsets = (Datasets & pattern)
            results_check(dsets, dataset_id, "Dataset ID")
            self.__tuple = dsets.fetch(limit=1, as_dict=True)[0]
@ -312,11 +313,12 @@ class Dataset:
        return len(Datasets())

    @staticmethod
-    def find(min_duration=None, experimenter=None, quality=None, min_date=None, max_date=None, test=False):
+    def find(dataset_id=None, min_duration=None, experimenter=None, quality=None, min_date=None, max_date=None, test=False):
        """Find dataset entries in the database. You may restrict the search by providing the following arguments. All restrictions are connected 
        with a logical AND.

        Args:
+            dataset_id (str, optional): the id of the given Dataset, if unique, all other restrictions will be ignored. Defaults to None
            min_duration (float, optional): minimum duration of the recording session, if not given, any length datasets will be returned. Defaults to None.
            experimenter (str, optional): the name of the one who did the recording. The name does not need to be the full name. Defaults to None.
            quality (str, optional): the quality assigned to the dataset during recording (e.g. good, fair, poor). Defaults to None.
@ -328,6 +330,10 @@ class Dataset:
            int: Count of matching results
        """
        dataset_list = Datasets()
+        if dataset_id:
+            dataset_list = dataset_list & "dataset_id like '%{0:s}%'".format(dataset_id)
+        if len(dataset_list) == 1:
+            return [Dataset(tuple=dataset_list.fetch(as_dict=True)[0])], 1
        if min_duration:
            dataset_list = dataset_list & "duration > %.2f" % min_duration
        if experimenter:
@ -377,6 +383,16 @@ class Dataset:
    def _tuple(self):
        return self.__tuple.copy()

+    @property
+    def yaml(self):
+        settings = yaml.dump({"dataset id": self.id, "recording date": self.recording_date, 
+                                   "duration": self.recording_duration, "comment": self.comment,
+                                   "experimenter": self.experimenter, "quality": self.quality,
+                                   "data_source": self.data_source, "host": self.data_host,
+                                   "setup": self.setup, "nixed": self.has_nix})
+        return settings
+
+
    def __str__(self):
        str = "dataset id: %s\n" % self.id
        str += "recorded: %s \t by: %s\n" % (self.recording_date, self.experimenter)
@ -385,6 +401,8 @@ class Dataset:
        str += "comment: %s" % self.comment
        return str

+    def __repr__(self):
+        return self.__str__()

 class RePro:
    """The RePro class represents an entry in the repro table. This is a run of a certain relacs "Research Protocol".
@ -513,6 +531,17 @@ class RePro:
        str += "start time: %s\t duration: %s\n" % (self.start, self.duration)
        return str

+    def __repr__(self):
+        return self.__str__()
+
+    @property
+    def to_dict(self):
+        r_settings = yaml.safe_load(self.settings.replace("\t", "    "))
+        settings = {"repro id": self.id, "run": self.run, "cell": self.cell_id,
+                    "name": self.name, "start": self.start, "duration": self.duration,
+                    "settings":r_settings}
+        return settings
+

 class Stimulus:
    """The stimulus class represents a Stimulus that was presented. A Stimulus has several properties 
@ -617,6 +646,17 @@ class Stimulus:
        
        return results, total

+    def __repr__(self):
+        return self.__str__()
+
+    @property
+    def to_dict(self):
+        s_settings = yaml.safe_load(self.settings.replace("\t", "    "))
+        settings = {"id": self.id, "run": self.run, "stimulus name" : self.name,
+                    "stimulus index": self.index, "duration": self.duration,  "start time": self.start_time, 
+                    "name": self.name, "settings": s_settings}
+        return settings
+

 class Subject:
    """Representation of the recorded subject's properties.
--- a/fishbook/frontend/relacs_classes.py
+++ b/fishbook/frontend/relacs_classes.py
@ -18,7 +18,11 @@ class BaselineData:

    This class provides access to basic measures estimated from the baseline activity.
    """
-    def __init__(self, dataset: Dataset):
+    def __init__(self, dataset=None, dataset_id=None):
+        d, _ = Dataset.find(dataset_id=dataset_id)
+        if len(d) == 0 or len(d) > 1:
+            raise ValueError("Dataset id not found or not unique")
+        dataset = d[0]
        self.__spike_data = []
        self.__eod_data = []
        self.__eod_times = []
@ -66,6 +70,21 @@ class BaselineData:
        a_corr = a_corr[int(len(a_corr) / 2):]
        return a_corr[:max_lags]

+    @property
+    def baseline_rate(self):
+        """The average baseline firing rate for each run of the baseline repro
+
+        Returns:
+            list of float: the average firing rate
+        """
+        rates = []
+        for i in range(self.size):
+            spikes = self.spikes(i)
+            max_time = np.floor(spikes)[-1]
+            min_time = np.ceil(spikes)[0]
+            rates.append(len(spikes[(spikes >= min_time) & (spikes < max_time)])/(max_time - min_time))
+        return rates
+
    def serial_correlation(self, max_lags=50):
        """
            Returns the serial correlation of the interspike intervals.
@ -255,7 +274,7 @@ class BaselineData:
        """
        vss = []
        spike_phases = []
-        for i, sd in enumerate(self.__spike_data):
+        for i in range(self.size):
            phases = self.__spike_phases(i)
            ms_sin_alpha = np.mean(np.sin(phases)) ** 2
            ms_cos_alpha = np.mean(np.cos(phases)) ** 2
@ -631,7 +650,8 @@ class FileStimulusData:
        """
        self.__spike_data = []
        self.__contrasts = []
-        self.__stimuli = []
+        self.__stimulus_files = []
+        self.__stimulus_settings = []
        self.__delays = []
        self.__durations = []
        self.__dataset = dataset
@ -641,6 +661,13 @@ class FileStimulusData:
        self.__stimspikes = None
        self._get_data()

+    @property
+    def dataset(self):
+        return self.__dataset
+    
+    @property
+    def cell(self):
+        return self.__cell

    def _get_data(self):
        if not self.__dataset:
@ -651,15 +678,16 @@ class FileStimulusData:
            self.__stimspikes = StimSpikesFile(self.__dataset.data_source)
        for r in self.__repros:
            if self.__dataset.has_nix:
-                spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_nix(r)
+                spikes, contrasts, stims, delays, durations, stim_settings = self.__read_spike_data_from_nix(r)
            else:
-                spikes, contrasts, stims, delays, durations = self.__read_spike_data_from_directory(r)
+                spikes, contrasts, stims, delays, durations, stim_settings = self.__read_spike_data_from_directory(r)
            if spikes is not None and len(spikes) > 0:
                self.__spike_data.extend(spikes)
                self.__contrasts.extend(contrasts)
-                self.__stimuli.extend(stims)
+                self.__stimulus_files.extend(stims)
                self.__delays.extend(delays)
                self.__durations.extend(durations)
+                self.__stimulus_settings.extend(stim_settings)
            else:
                continue

@ -703,7 +731,7 @@ class FileStimulusData:
            delay = float(r_settings["delay"].split(":")[-1])
        start_time = stimulus.start_time - delay
        end_time = stimulus.start_time + mt.extents[stimulus.index]
-        duration = mt.extents[stimulus.index]
+        duration = float(mt.extents[stimulus.index])
        contrast = self.__find_contrast(r_settings, s_settings, True)
        
        spikes = self.__all_spikes[(self.__all_spikes >= start_time) & (self.__all_spikes < end_time)] - start_time - delay
@ -723,9 +751,12 @@ class FileStimulusData:
        stim_files = []
        delays = []
        durations = []
+        settings = []
+        repro_settings = repro.to_dict
+        r_settings = yaml.safe_load(repro.settings.replace("\t", ""))
        stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id)
        if len(stimuli) == 0:
-            return spikes, contrasts, stim_files
+            return spikes, contrasts, stim_files, [], [], []
        data_source = os.path.join(self.__dataset.data_source, self.__dataset.id + ".nix")
        if not os.path.exists(data_source):
            print("Data not found! Trying from directory")
@ -740,14 +771,16 @@ class FileStimulusData:
                mt = b.multi_tags[s.multi_tag_id]
            
            sp, c, stim, delay, duration = self.__do_read_spike_data_from_nix(mt, s, repro)
-            if len(sp) > 0:
+            if len(sp) > 5:
                spikes.append(sp)
                contrasts.append(c)
                stim_files.append(stim)
                delays.append(delay)
                durations.append(duration)
+                stim_settings = s.to_dict
+                settings.append({"stimulus": stim_settings, "repro": repro_settings})
        f.close()
-        return spikes, contrasts, stim_files, delays, contrasts
+        return spikes, contrasts, stim_files, delays, durations, settings

    def __read_spike_data_from_directory(self, repro: RePro):
        stimuli, _ = Stimulus.find(cell_id=repro.cell_id, repro_id=repro.id)
@ -756,23 +789,30 @@ class FileStimulusData:
        stim_files = []
        delays = []
        durations = []
+        settings = []
        r_settings = yaml.safe_load(repro.settings.replace("\t", ""))
        r_settings = r_settings["project"] if "project" in r_settings.keys() else r_settings
+        repro_settings = repro.to_dict
        for s in stimuli:
            s_settings = yaml.safe_load(s.settings.replace("\t", ""))
            s_settings = s_settings["project"] if "project" in s_settings.keys() else s_settings
            contrast = self.__find_contrast(r_settings, s_settings, False)
-            duration = float(s_settings["duration"][:-2]) / 1000
-            sp = self.__stimspikes.get(s.run, s.index)
-            if not sp or len(sp) < 1:
+            dur, sp = self.__stimspikes.get(s.run, s.index)
+            if not sp or len(sp) < 5:
                continue
+            
+            if "duration" in s_settings.keys():
+                duration = float(s_settings["duration"][:-2]) / 1000
+            else:
+                duration = dur
            contrasts.append(contrast)
            delays.append(float(r_settings["before"][:-2]) / 1000)
            durations.append(duration)
            stim_files.append(s_settings["file"])
            spikes.append(sp)
-            
-        return spikes, contrasts, stim_files, delays, durations
+            settings.append({"stimulus": s.to_dict, "repro": repro_settings})
+
+        return spikes, contrasts, stim_files, delays, durations, settings

    def read_stimulus(self, index=0):
        pass
@ -789,6 +829,11 @@ class FileStimulusData:
        else:
            raise IndexError("FileStimulusData: index %i out of bounds for spike data of size %i" % (index, self.size))

+    def stimulus_settings(self, index=0):
+        if index >= self.size:
+            raise IndexError("FileStimulusData: index %i is out of bounds for spike data of size %i" %(index, self.size))
+        return self.__stimulus_settings[index]
+
    def contrast(self, index=-1):
        if index == -1:
            return self.__contrasts
@ -799,9 +844,9 @@ class FileStimulusData:

    def stimulus_files(self, index=-1):
        if index == -1:
-            return self.__stimuli
+            return self.__stimulus_files
        elif index >=0 and index < self.size:
-            return self.__stimuli[index]
+            return self.__stimulus_files[index]
        else:
            raise IndexError("FileStimulusData: index %i out of bounds for contrasts data of size %i" % (index, self.size))

--- a/fishbook/frontend/util.py
+++ b/fishbook/frontend/util.py
@ -200,28 +200,31 @@ class StimSpikesFile:
        
        index_map = {}
        trial_data = []
+        trial_duration = 0.0
        index = 0
        trial = 0
            
        for l in lines:
            l = l.strip()
+            if "duration:" in l:
+                trial_duration = float(l[1:].strip().split(":")[-1][:-3])
            if "index:" in l:
                if len(trial_data) > 0:
-                    index_map[(index, trial)] = trial_data
+                    index_map[(index, trial)] = (trial_duration, trial_data)
                    trial_data = []
                index = int(l[1:].strip().split(":")[-1])
            if "trial:" in l:
                if len(trial_data) > 0:
-                    index_map[(index, trial)] = trial_data
+                    index_map[(index, trial)] = (trial_duration, trial_data)
                    trial_data = []
                trial = int(l[1:].strip().split(":")[-1])
            if len(l) > 0 and "#" not in l:
                trial_data.append(float(l)/1000)
-        
+        index_map[(index, trial)] = (trial_duration, trial_data)
        return index_map

    def get(self, run_index, trial_index):
        if tuple([run_index, trial_index]) not in self._data_map.keys():
            print("Data not found for run %i and trial %i:" % (run_index, trial_index))
-            return None
+            return None, None
        return self._data_map[(run_index, trial_index)]