Merge branch 'master' into chirp_bodylength

finishing plot chirp_body length
export functions in modules, plot chirp
2023-01-24 13:24:26 +01:00 · 2023-01-24 13:23:12 +01:00 · 2023-01-24 12:06:29 +01:00 · 2023-01-24 11:43:15 +01:00 · 2023-01-24 11:43:14 +01:00 · 2023-01-24 11:43:10 +01:00
9 changed files with 519 additions and 268 deletions
--- a/code/chirpdetection.py
+++ b/code/chirpdetection.py
@@ -18,6 +18,7 @@ from modules.datahandling import (
    purge_duplicates,
    group_timestamps,
    instantaneous_frequency,
+    minmaxnorm
 )

 logger = makeLogger(__name__)
@@ -26,7 +27,7 @@ ps = PlotStyle()


@dataclass
-class PlotBuffer:
+class ChirpPlotBuffer:

    """
    Buffer to save data that is created in the main detection loop
@@ -83,6 +84,7 @@ class PlotBuffer:
            q50 + self.search_frequency + self.config.minimal_bandwidth / 2,
            q50 + self.search_frequency - self.config.minimal_bandwidth / 2,
        )
+        print(search_upper, search_lower)

        # get indices on raw data
        start_idx = (self.t0 - 5) * self.data.raw_rate
@@ -94,7 +96,8 @@ class PlotBuffer:

        self.time = self.time - self.t0
        self.frequency_time = self.frequency_time - self.t0
-        chirps = np.asarray(chirps) - self.t0
+        if len(chirps) > 0:
+            chirps = np.asarray(chirps) - self.t0
        self.t0_old = self.t0
        self.t0 = 0

@@ -130,7 +133,7 @@ class PlotBuffer:
            data_oi,
            self.data.raw_rate,
            self.t0 - 5,
-            [np.max(self.frequency) - 200, np.max(self.frequency) + 200]
+            [np.min(self.frequency) - 100, np.max(self.frequency) + 200]
        )

        for track_id in self.data.ids:
@@ -145,14 +148,15 @@ class PlotBuffer:

            # get tracked frequencies and their times
            f = self.data.freq[window_idx]
-            t = self.data.time[
-                self.data.idx[self.data.ident == self.track_id]]
-            tmask = (t >= t0_track) & (t <= (t0_track + dt_track))
+            # t = self.data.time[
+            #     self.data.idx[self.data.ident == self.track_id]]
+            # tmask = (t >= t0_track) & (t <= (t0_track + dt_track))
+            t = self.data.time[self.data.idx[window_idx]]
            if track_id == self.track_id:
-                ax0.plot(t[tmask]-self.t0_old, f, lw=lw,
+                ax0.plot(t-self.t0_old, f, lw=lw,
                         zorder=10, color=ps.gblue1)
            else:
-                ax0.plot(t[tmask]-self.t0_old, f, lw=lw,
+                ax0.plot(t-self.t0_old, f, lw=lw,
                         zorder=10, color=ps.gray, alpha=0.5)

        ax0.fill_between(
@@ -180,10 +184,11 @@ class PlotBuffer:
        #             spec_times[0], spec_times[-1],
        #             color=ps.gblue2, lw=2, ls="dashed")

-        for chirp in chirps:
-            ax0.scatter(
-                chirp, np.median(self.frequency) + 150, c=ps.black, marker="v"
-            )
+        if len(chirps) > 0:
+            for chirp in chirps:
+                ax0.scatter(
+                    chirp, np.median(self.frequency) + 150, c=ps.black, marker="v"
+                )

        # plot waveform of filtered signal
        ax1.plot(self.time, self.baseline * waveform_scaler,
@@ -318,7 +323,7 @@ def plot_spectrogram(
        aspect="auto",
        origin="lower",
        interpolation="gaussian",
-        alpha=1,
+        alpha=0.6,
    )
    # axis.use_sticky_edges = False
    return spec_times
@@ -431,6 +436,28 @@ def window_median_all_track_ids(
    return frequency_percentiles, track_ids


+def array_center(array: np.ndarray) -> float:
+    """
+    Return the center value of an array.
+    If the array length is even, returns
+    the mean of the two center values.
+
+    Parameters
+    ----------
+    array : np.ndarray
+        Array to calculate the center from.
+
+    Returns
+    -------
+    float
+
+    """
+    if len(array) % 2 == 0:
+        return np.mean(array[int(len(array) / 2) - 1:int(len(array) / 2) + 1])
+    else:
+        return array[int(len(array) / 2)]
+
+
 def find_searchband(
    current_frequency: np.ndarray,
    percentiles_ids: np.ndarray,
@@ -464,15 +491,17 @@ def find_searchband(
    # frequency window where second filter filters is potentially allowed
    # to filter. This is the search window, in which we want to find
    # a gap in the other fish's EODs.
-
+    current_median = np.median(current_frequency)
    search_window = np.arange(
-        np.median(current_frequency) + config.search_df_lower,
-        np.median(current_frequency) + config.search_df_upper,
+        current_median + config.search_df_lower,
+        current_median + config.search_df_upper,
        config.search_res,
    )

    # search window in boolean
-    search_window_bool = np.ones_like(len(search_window), dtype=bool)
+    bool_lower = np.ones_like(search_window, dtype=bool)
+    bool_upper = np.ones_like(search_window, dtype=bool)
+    search_window_bool = np.ones_like(search_window, dtype=bool)

    # make seperate arrays from the qartiles
    q25 = np.asarray([i[0] for i in frequency_percentiles])
@@ -480,7 +509,7 @@ def find_searchband(

    # get tracks that fall into search window
    check_track_ids = percentiles_ids[
-        (q25 > search_window[0]) & (
+        (q25 > current_median) & (
            q75 < search_window[-1])
    ]

@@ -492,11 +521,10 @@ def find_searchband(
            q25_temp = q25[percentiles_ids == check_track_id]
            q75_temp = q75[percentiles_ids == check_track_id]

-            print(q25_temp, q75_temp)
-
-            search_window_bool[
-                (search_window > q25_temp) & (search_window < q75_temp)
-            ] = False
+            bool_lower[search_window > q25_temp - config.search_res] = False
+            bool_upper[search_window < q75_temp + config.search_res] = False
+            search_window_bool[(bool_lower == False) &
+                               (bool_upper == False)] = False

        # find gaps in search window
        search_window_indices = np.arange(len(search_window))
@@ -509,6 +537,9 @@ def find_searchband(
        nonzeros = search_window_gaps[np.nonzero(search_window_gaps)[0]]
        nonzeros = nonzeros[~np.isnan(nonzeros)]

+        if len(nonzeros) == 0:
+            return config.default_search_freq
+
        # if the first value is -1, the array starst with true, so a gap
        if nonzeros[0] == -1:
            stops = search_window_indices[search_window_gaps == -1]
@@ -543,16 +574,14 @@ def find_searchband(
        # the center of the search frequency band is then the center of
        # the longest gap

-        search_freq = (
-            longest_search_window[-1] - longest_search_window[0]
-        ) / 2
+        search_freq = array_center(longest_search_window) - current_median

        return search_freq

    return config.default_search_freq


-def main(datapath: str, plot: str) -> None:
+def chirpdetection(datapath: str, plot: str, debug: str = 'false') -> None:

    assert plot in [
        "save",
@@ -560,7 +589,17 @@ def main(datapath: str, plot: str) -> None:
        "false",
    ], "plot must be 'save', 'show' or 'false'"

+    assert debug in [
+        "false",
+        "electrode",
+        "fish",
+    ], "debug must be 'false', 'electrode' or 'fish'"
+
+    if debug != "false":
+        assert plot == "show", "debug mode only runs when plot is 'show'"
+
    # load raw file
+    print('datapath', datapath)
    data = LoadData(datapath)

    # load config file
@@ -651,14 +690,14 @@ def main(datapath: str, plot: str) -> None:
            # approximate sampling rate to compute expected durations if there
            # is data available for this time window for this fish id

-            track_samplerate = np.mean(1 / np.diff(data.time))
-            expected_duration = (
-                (window_start_seconds + window_duration_seconds)
-                - window_start_seconds
-            ) * track_samplerate
+#             track_samplerate = np.mean(1 / np.diff(data.time))
+#             expected_duration = (
+#                 (window_start_seconds + window_duration_seconds)
+#                 - window_start_seconds
+#             ) * track_samplerate

            # check if tracked data available in this window
-            if len(current_frequencies) < expected_duration / 2:
+            if len(current_frequencies) < 3:
                logger.warning(
                    f"Track {track_id} has no data in window {st}, skipping."
                )
@@ -750,11 +789,11 @@ def main(datapath: str, plot: str) -> None:

                baseline_envelope = -baseline_envelope

-                baseline_envelope = envelope(
-                    signal=baseline_envelope,
-                    samplerate=data.raw_rate,
-                    cutoff_frequency=config.baseline_envelope_envelope_cutoff,
-                )
+                # baseline_envelope = envelope(
+                #     signal=baseline_envelope,
+                #     samplerate=data.raw_rate,
+                #     cutoff_frequency=config.baseline_envelope_envelope_cutoff,
+                # )

                # compute the envelope of the search band. Peaks in the search
                # band envelope correspond to troughs in the baseline envelope
@@ -788,25 +827,25 @@ def main(datapath: str, plot: str) -> None:
                # compute the envelope of the signal to remove the oscillations
                # around the peaks

-                baseline_frequency_samplerate = np.mean(
-                    np.diff(baseline_frequency_time)
-                )
+                # baseline_frequency_samplerate = np.mean(
+                #     np.diff(baseline_frequency_time)
+                # )

                baseline_frequency_filtered = np.abs(
                    baseline_frequency - np.median(baseline_frequency)
                )

-                baseline_frequency_filtered = highpass_filter(
-                    signal=baseline_frequency_filtered,
-                    samplerate=baseline_frequency_samplerate,
-                    cutoff=config.baseline_frequency_highpass_cutoff,
-                )
+                # baseline_frequency_filtered = highpass_filter(
+                #     signal=baseline_frequency_filtered,
+                #     samplerate=baseline_frequency_samplerate,
+                #     cutoff=config.baseline_frequency_highpass_cutoff,
+                # )

-                baseline_frequency_filtered = envelope(
-                    signal=-baseline_frequency_filtered,
-                    samplerate=baseline_frequency_samplerate,
-                    cutoff_frequency=config.baseline_frequency_envelope_cutoff,
-                )
+                # baseline_frequency_filtered = envelope(
+                #     signal=-baseline_frequency_filtered,
+                #     samplerate=baseline_frequency_samplerate,
+                #     cutoff_frequency=config.baseline_frequency_envelope_cutoff,
+                # )

                # CUT OFF OVERLAP ---------------------------------------------

@@ -847,25 +886,25 @@ def main(datapath: str, plot: str) -> None:
                # normalize all three feature arrays to the same range to make
                # peak detection simpler

-                baseline_envelope = normalize([baseline_envelope])[0]
-                search_envelope = normalize([search_envelope])[0]
-                baseline_frequency_filtered = normalize(
-                    [baseline_frequency_filtered]
-                )[0]
+                # baseline_envelope = minmaxnorm([baseline_envelope])[0]
+                # search_envelope = minmaxnorm([search_envelope])[0]
+                # baseline_frequency_filtered = minmaxnorm(
+                #     [baseline_frequency_filtered]
+                # )[0]

                # PEAK DETECTION ----------------------------------------------

                # detect peaks baseline_enelope
                baseline_peak_indices, _ = find_peaks(
-                    baseline_envelope, prominence=config.prominence
+                    baseline_envelope, prominence=config.baseline_prominence
                )
                # detect peaks search_envelope
                search_peak_indices, _ = find_peaks(
-                    search_envelope, prominence=config.prominence
+                    search_envelope, prominence=config.search_prominence
                )
                # detect peaks inst_freq_filtered
                frequency_peak_indices, _ = find_peaks(
-                    baseline_frequency_filtered, prominence=config.prominence
+                    baseline_frequency_filtered, prominence=config.frequency_prominence
                )

                # DETECT CHIRPS IN SEARCH WINDOW ------------------------------
@@ -890,7 +929,7 @@ def main(datapath: str, plot: str) -> None:
                    or len(frequency_peak_timestamps) == 0
                )

-                if one_feature_empty:
+                if one_feature_empty and (debug == 'false'):
                    continue

                # group peak across feature arrays but only if they
@@ -911,25 +950,23 @@ def main(datapath: str, plot: str) -> None:
                # check it there are chirps detected after grouping, continue
                # with the loop if not

-                if len(singleelectrode_chirps) == 0:
+                if (len(singleelectrode_chirps) == 0) and (debug == 'false'):
                    continue

                # append chirps from this electrode to the multilectrode list
                multielectrode_chirps.append(singleelectrode_chirps)

                # only initialize the plotting buffer if chirps are detected
-                chirp_detected = (
-                    (el == config.number_electrodes - 1)
-                    & (len(singleelectrode_chirps) > 0)
-                    & (plot in ["show", "save"])
-                )
+                chirp_detected = (el == (config.number_electrodes - 1)
+                                  & (plot in ["show", "save"])
+                                  )

-                if chirp_detected:
+                if chirp_detected or (debug != 'elecrode'):

                    logger.debug("Detected chirp, ititialize buffer ...")

                    # save data to Buffer
-                    buffer = PlotBuffer(
+                    buffer = ChirpPlotBuffer(
                        config=config,
                        t0=window_start_seconds,
                        dt=window_duration_seconds,
@@ -954,6 +991,11 @@ def main(datapath: str, plot: str) -> None:

                    logger.debug("Buffer initialized!")

+                if debug == "electrode":
+                    logger.info(f'Plotting electrode {el} ...')
+                    buffer.plot_buffer(
+                        chirps=singleelectrode_chirps, plot=plot)
+
            logger.debug(
                f"Processed all electrodes for fish {track_id} for this"
                "window, sorting chirps ..."
@@ -962,7 +1004,7 @@ def main(datapath: str, plot: str) -> None:
            # check if there are chirps detected in multiple electrodes and
            # continue the loop if not

-            if len(multielectrode_chirps) == 0:
+            if (len(multielectrode_chirps) == 0) and (debug == 'false'):
                continue

            # validate multielectrode chirps, i.e. check if they are
@@ -987,9 +1029,15 @@ def main(datapath: str, plot: str) -> None:
            # if chirps are detected and the plot flag is set, plot the
            # chirps, otheswise try to delete the buffer if it exists

-            if len(multielectrode_chirps_validated) > 0:
+            if debug == "fish":
+                logger.info(f'Plotting fish {track_id} ...')
+                buffer.plot_buffer(multielectrode_chirps_validated, plot)
+
+            if ((len(multielectrode_chirps_validated) > 0) &
+                    (plot in ["show", "save"]) & (debug == 'false')):
                try:
                    buffer.plot_buffer(multielectrode_chirps_validated, plot)
+                    del buffer
                except NameError:
                    pass
            else:
@@ -1049,4 +1097,4 @@ if __name__ == "__main__":
    datapath = "../data/2022-06-02-10_00/"
    # datapath = "/home/weygoldt/Data/uni/efishdata/2016-colombia/fishgrid/2016-04-09-22_25/"
    # datapath = "/home/weygoldt/Data/uni/chirpdetection/GP2023_chirp_detection/data/mount_data/2020-03-13-10_00/"
-    main(datapath, plot="save")
+    chirpdetection(datapath, plot="show", debug="false")
--- a/code/chirpdetector_conf.yml
+++ b/code/chirpdetector_conf.yml
@@ -1,47 +1,41 @@
-# directory setup
-dataroot: "../data/"
-outputdir: "../output/"
+# Path setup ------------------------------------------------------------------

-# Duration and overlap of the analysis window in seconds
-window: 10
-overlap: 1
-edge: 0.25
+dataroot: "../data/"      # path to data
+outputdir: "../output/"   # path to save plots to

-# Number of electrodes to go over
-number_electrodes: 3
-minimum_electrodes: 2
+# Rolling window parameters ---------------------------------------------------

-# Search window bandwidth and minimal baseline bandwidth
-minimal_bandwidth: 20
+window: 5   # rolling window length in seconds
+overlap: 1  # window overlap in seconds
+edge: 0.25  # window edge cufoffs to mitigate filter edge effects

-# Instantaneous frequency smoothing usint a gaussian kernel of this width
-baseline_frequency_smoothing: 5
+# Electrode iteration parameters ----------------------------------------------

-# Baseline processing parameters
-baseline_envelope_cutoff: 25
-baseline_envelope_bandpass_lowf: 4
-baseline_envelope_bandpass_highf: 100
-baseline_envelope_envelope_cutoff: 4
+number_electrodes: 2    # number of electrodes to go over
+minimum_electrodes: 1   # mimumun number of electrodes a chirp must be on

-# search envelope processing parameters
-search_envelope_cutoff: 5
+# Feature extraction parameters -----------------------------------------------

-# Instantaneous frequency bandpass filter cutoff frequencies
-baseline_frequency_highpass_cutoff: 0.000005
-baseline_frequency_envelope_cutoff: 0.000005
+search_df_lower: 20     # start searching this far above the baseline
+search_df_upper: 100    # stop searching this far above the baseline
+search_res: 1           # search window resolution
+default_search_freq: 60 # search here if no need for a search frequency
+minimal_bandwidth: 10   # minimal bandpass filter width for baseline
+search_bandwidth: 10    # minimal bandpass filter width for search frequency
+baseline_frequency_smoothing: 10 # instantaneous frequency smoothing

-# peak detecion parameters
-prominence: 0.005
+# Feature processing parameters -----------------------------------------------

-# search freq parameter
-search_df_lower: 20
-search_df_upper: 100
-search_res: 1
-search_bandwidth: 10
-default_search_freq: 50
+baseline_envelope_cutoff: 25            # envelope estimation cutoff
+baseline_envelope_bandpass_lowf: 2      # envelope badpass lower cutoff
+baseline_envelope_bandpass_highf: 100   # envelope bandbass higher cutoff
+search_envelope_cutoff: 10              # search envelope estimation cufoff
+
+# Peak detecion parameters ----------------------------------------------------
+baseline_prominence: 0.00005  # peak prominence threshold for baseline envelope
+search_prominence: 0.000004   # peak prominence threshold for search envelope
+frequency_prominence: 2       # peak prominence threshold for baseline freq

 # Classify events as chirps if they are less than this time apart
-chirp_window_threshold: 0.05
-
-
+chirp_window_threshold: 0.02

--- a/code/extract_chirps.py
+++ b/code/extract_chirps.py
@@ -0,0 +1,48 @@
+import os
+import pandas as pd
+import numpy as np
+from chirpdetection import chirpdetection
+from IPython import embed
+
+
+def main(datapaths):
+
+    for path in datapaths:
+        chirpdetection(path, plot='show')
+
+
+if __name__ == '__main__':
+
+    dataroot = '../data/mount_data/'
+
+    datasets = sorted([name for name in os.listdir(dataroot) if os.path.isdir(
+        os.path.join(dataroot, name))])
+
+    valid_datasets = []
+
+    for dataset in datasets:
+
+        path = os.path.join(dataroot, dataset)
+        csv_name = '-'.join(dataset.split('-')[:3]) + '.csv'
+
+        if os.path.exists(os.path.join(path, csv_name)) is False:
+            continue
+
+        if os.path.exists(os.path.join(path, 'ident_v.npy')) is False:
+            continue
+
+        ident = np.load(os.path.join(path, 'ident_v.npy'))
+        number_of_fish = len(np.unique(ident[~np.isnan(ident)]))
+        if number_of_fish != 2:
+            continue
+
+        valid_datasets.append(dataset)
+
+    datapaths = [os.path.join(dataroot, dataset) +
+                 '/' for dataset in valid_datasets]
+
+    recs = pd.DataFrame(columns=['recording'], data=valid_datasets)
+    recs.to_csv('../recs.csv', index=False)
+    main(datapaths)
+
+# window 1524 + 244 in dataset index 4 is nice example
--- a/code/get_behaviour.py
+++ b/code/get_behaviour.py
@@ -0,0 +1,35 @@
+import os 
+from paramiko import SSHClient
+from scp import SCPClient
+from IPython import embed
+from pandas import read_csv
+
+ssh = SSHClient()
+ssh.load_system_host_keys()
+
+ssh.connect(hostname='kraken',
+            username='efish',
+            password='fwNix4U',
+            )
+
+
+# SCPCLient takes a paramiko transport as its only argument
+scp = SCPClient(ssh.get_transport())
+
+data = read_csv('../recs.csv')
+foldernames = data['recording'].values
+
+directory = f'/Users/acfw/Documents/uni_tuebingen/chirpdetection/GP2023_chirp_detection/data/mount_data/'
+for foldername in foldernames:
+
+    if not os.path.exists(directory+foldername):
+        os.makedirs(directory+foldername)
+
+    files = [('-').join(foldername.split('-')[:3])+'.csv','chirp_ids.npy', 'chirps.npy', 'fund_v.npy', 'ident_v.npy', 'idx_v.npy', 'times.npy', 'spec.npy', 'LED_on_time.npy', 'sign_v.npy']
+
+
+    for f in files:
+        scp.get(f'/home/efish/behavior/2019_tube_competition/{foldername}/{f}',
+                directory+foldername)
+
+scp.close()
--- a/code/modules/behaviour_handling.py
+++ b/code/modules/behaviour_handling.py
@@ -0,0 +1,99 @@
+import numpy as np
+
+import os 
+
+import numpy as np
+from IPython import embed
+
+
+from pandas import read_csv
+from modules.logger import makeLogger
+
+
+logger = makeLogger(__name__)
+
+
+class Behavior:
+    """Load behavior data from csv file as class attributes
+        Attributes
+    ----------
+    behavior: 0: chasing onset, 1: chasing offset, 2: physical contact
+    behavior_type:         
+    behavioral_category:   
+    comment_start:         
+    comment_stop:          
+    dataframe: pandas dataframe with all the data            
+    duration_s:             
+    media_file:            
+    observation_date:      
+    observation_id:        
+    start_s: start time of the event in seconds               
+    stop_s:  stop time of the event in seconds               
+    total_length:          
+    """
+
+    def __init__(self, folder_path: str) -> None:
+
+        LED_on_time_BORIS = np.load(os.path.join(folder_path, 'LED_on_time.npy'), allow_pickle=True)
+
+        csv_filename = [f for f in os.listdir(folder_path) if f.endswith('.csv')][0]
+        logger.info(f'CSV file: {csv_filename}')
+        self.dataframe = read_csv(os.path.join(folder_path, csv_filename))
+
+        self.chirps = np.load(os.path.join(folder_path, 'chirps.npy'), allow_pickle=True)
+        self.chirps_ids = np.load(os.path.join(folder_path, 'chirp_ids.npy'), allow_pickle=True)
+
+        self.ident = np.load(os.path.join(folder_path, 'ident_v.npy'), allow_pickle=True)
+        self.idx = np.load(os.path.join(folder_path, 'idx_v.npy'), allow_pickle=True)
+        self.freq = np.load(os.path.join(folder_path, 'fund_v.npy'), allow_pickle=True)
+        self.time = np.load(os.path.join(folder_path, "times.npy"), allow_pickle=True)
+        self.spec = np.load(os.path.join(folder_path, "spec.npy"), allow_pickle=True)    
+
+        for k, key in enumerate(self.dataframe.keys()):
+            key = key.lower() 
+            if ' ' in key:
+                key = key.replace(' ', '_')
+                if '(' in key:
+                    key = key.replace('(', '')
+                    key = key.replace(')', '')
+            setattr(self, key, np.array(self.dataframe[self.dataframe.keys()[k]]))
+        
+        last_LED_t_BORIS = LED_on_time_BORIS[-1]
+        real_time_range = self.time[-1] - self.time[0]
+        factor = 1.034141
+        shift = last_LED_t_BORIS - real_time_range * factor
+        self.start_s = (self.start_s - shift) / factor
+        self.stop_s = (self.stop_s - shift) / factor
+
+
+def correct_chasing_events(
+    category: np.ndarray, 
+    timestamps: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray]:
+
+    onset_ids = np.arange(
+        len(category))[category == 0]
+    offset_ids = np.arange(
+        len(category))[category == 1]
+
+    woring_bh = np.arange(len(category))[category!=2][:-1][np.diff(category[category!=2])==0]
+    if onset_ids[0] > offset_ids[0]:
+        offset_ids = np.delete(offset_ids, 0)
+        help_index = offset_ids[0]
+        woring_bh = np.append(woring_bh, help_index)
+
+    category = np.delete(category, woring_bh)
+    timestamps = np.delete(timestamps, woring_bh)
+
+    # Check whether on- or offset is longer and calculate length difference
+    if len(onset_ids) > len(offset_ids):
+        len_diff = len(onset_ids) - len(offset_ids)
+        logger.info(f'Onsets are greater than offsets by {len_diff}')
+    elif len(onset_ids) < len(offset_ids):
+        len_diff = len(offset_ids) - len(onset_ids)
+        logger.info(f'Offsets are greater than onsets by {len_diff}')
+    elif len(onset_ids) == len(offset_ids):
+        logger.info('Chasing events are equal')
+
+
+    return category, timestamps
--- a/code/modules/datahandling.py
+++ b/code/modules/datahandling.py
@@ -4,7 +4,7 @@ from scipy.ndimage import gaussian_filter1d
 from scipy.stats import gamma, norm


-def scale01(data):
+def minmaxnorm(data):
    """
    Normalize data to [0, 1]

@@ -19,7 +19,7 @@ def scale01(data):
        Normalized data.

    """
-    return (2*((data - np.min(data)) / (np.max(data) - np.min(data)))) - 1
+    return (data - np.min(data)) / (np.max(data) - np.min(data))


 def instantaneous_frequency(
@@ -168,6 +168,9 @@ def group_timestamps(
    ]
    timestamps.sort()

+    if len(timestamps) == 0:
+        return []
+
    groups = []
    current_group = [timestamps[0]]

--- a/code/plot_chirp_bodylegth.py
+++ b/code/plot_chirp_bodylegth.py
@@ -0,0 +1,87 @@
+import numpy as np
+
+import os 
+
+import numpy as np
+import matplotlib.pyplot as plt 
+from thunderfish.powerspectrum import decibel
+
+from IPython import embed
+from pandas import read_csv
+from modules.logger import makeLogger
+from modules.plotstyle import PlotStyle
+from modules.behaviour_handling import Behavior, correct_chasing_events
+
+ps = PlotStyle()
+
+logger = makeLogger(__name__)
+
+
+def main(datapath: str):
+
+    foldernames = [datapath + x + '/'  for x in os.listdir(datapath) if os.path.isdir(datapath+x)]
+    path_to_csv = ('/').join(foldernames[0].split('/')[:-2]) + '/order_meta.csv'
+    meta_id = read_csv(path_to_csv)
+    meta_id['recording'] = meta_id['recording'].str[1:-1]
+    
+    chirps_winner = []
+    chirps_loser = []
+
+    for foldername in foldernames:
+        # behabvior is pandas dataframe with all the data
+        if foldername == '../data/mount_data/2020-05-12-10_00/':
+            continue
+        bh = Behavior(foldername)
+        # chirps are not sorted in time (presumably due to prior groupings)
+        # get and sort chirps and corresponding fish_ids of the chirps
+        category = bh.behavior
+        timestamps = bh.start_s
+        # Correct for doubles in chasing on- and offsets to get the right on-/offset pairs
+        # Get rid of tracking faults (two onsets or two offsets after another)
+        category, timestamps = correct_chasing_events(category, timestamps)
+
+        folder_name = foldername.split('/')[-2]
+        winner_row = meta_id[meta_id['recording'] == folder_name]
+        winner = winner_row['winner'].values[0].astype(int)
+        winner_fish1 = winner_row['fish1'].values[0].astype(int)
+        winner_fish2 = winner_row['fish2'].values[0].astype(int)
+        if winner == winner_fish1:
+            winner_fish_id = winner_row['rec_id1'].values[0]
+            loser_fish_id = winner_row['rec_id2'].values[0]
+        elif winner == winner_fish2:
+            winner_fish_id = winner_row['rec_id2'].values[0]
+            loser_fish_id = winner_row['rec_id1'].values[0]
+        else:
+            continue
+
+        print(foldername)
+        all_fish_ids = np.unique(bh.chirps_ids)
+        chirp_loser = len(bh.chirps[bh.chirps_ids == loser_fish_id])
+        chirp_winner = len(bh.chirps[bh.chirps_ids == winner_fish_id])
+        chirps_winner.append(chirp_winner)
+        chirps_loser.append(chirp_loser)
+
+
+        fish1_id = all_fish_ids[0]
+        fish2_id = all_fish_ids[1]
+        print(winner_fish_id)
+        print(all_fish_ids)
+
+
+    fig, ax = plt.subplots()
+    ax.boxplot([chirps_winner, chirps_loser], showfliers=False)
+    ax.scatter(np.ones(len(chirps_winner)), chirps_winner, color='r')
+    ax.scatter(np.ones(len(chirps_loser))*2, chirps_loser, color='r')
+    ax.set_xticklabels(['winner', 'loser'])
+    for w, l in zip(chirps_winner, chirps_loser):
+        ax.plot([1,2], [w,l], color='r', alpha=0.5, linewidth=0.5)
+
+    ax.set_ylabel('Chirpscounts [n]')
+    plt.show()
+
+if __name__ == '__main__':
+
+    # Path to the data
+    datapath = '../data/mount_data/'
+    
+    main(datapath)
--- a/code/plot_event_timeline.py
+++ b/code/plot_event_timeline.py
@@ -10,188 +10,96 @@ from IPython import embed
 from pandas import read_csv
 from modules.logger import makeLogger
 from modules.plotstyle import PlotStyle
+from modules.behaviour_handling import Behavior, correct_chasing_events

 ps = PlotStyle()

 logger = makeLogger(__name__)


-class Behavior:
-    """Load behavior data from csv file as class attributes
-        Attributes
-    ----------
-    behavior: 0: chasing onset, 1: chasing offset, 2: physical contact
-    behavior_type:         
-    behavioral_category:   
-    comment_start:         
-    comment_stop:          
-    dataframe: pandas dataframe with all the data            
-    duration_s:             
-    media_file:            
-    observation_date:      
-    observation_id:        
-    start_s: start time of the event in seconds               
-    stop_s:  stop time of the event in seconds               
-    total_length:          
-    """
-
-    def __init__(self, folder_path: str) -> None:
-        
-
-        LED_on_time_BORIS = np.load(os.path.join(folder_path, 'LED_on_time.npy'), allow_pickle=True)
-
-        csv_filename = [f for f in os.listdir(folder_path) if f.endswith('.csv')][0]
-        logger.info(f'CSV file: {csv_filename}')
-        self.dataframe = read_csv(os.path.join(folder_path, csv_filename))
-
-        self.chirps = np.load(os.path.join(folder_path, 'chirps.npy'), allow_pickle=True)
-        self.chirps_ids = np.load(os.path.join(folder_path, 'chirps_ids.npy'), allow_pickle=True)
-
-        self.ident = np.load(os.path.join(folder_path, 'ident_v.npy'), allow_pickle=True)
-        self.idx = np.load(os.path.join(folder_path, 'idx_v.npy'), allow_pickle=True)
-        self.freq = np.load(os.path.join(folder_path, 'fund_v.npy'), allow_pickle=True)
-        self.time = np.load(os.path.join(folder_path, "times.npy"), allow_pickle=True)
-        self.spec = np.load(os.path.join(folder_path, "spec.npy"), allow_pickle=True)    
-
-        for k, key in enumerate(self.dataframe.keys()):
-            key = key.lower() 
-            if ' ' in key:
-                key = key.replace(' ', '_')
-                if '(' in key:
-                    key = key.replace('(', '')
-                    key = key.replace(')', '')
-            setattr(self, key, np.array(self.dataframe[self.dataframe.keys()[k]]))
-        
-        last_LED_t_BORIS = LED_on_time_BORIS[-1]
-        real_time_range = self.time[-1] - self.time[0]
-        factor = 1.034141
-        shift = last_LED_t_BORIS - real_time_range * factor
-        self.start_s = (self.start_s - shift) / factor
-        self.stop_s = (self.stop_s - shift) / factor
-
-def correct_chasing_events(
-    category: np.ndarray, 
-    timestamps: np.ndarray
-    ) -> tuple[np.ndarray, np.ndarray]:
-
-    onset_ids = np.arange(
-        len(category))[category == 0]
-    offset_ids = np.arange(
-        len(category))[category == 1]
-
-    # Check whether on- or offset is longer and calculate length difference
-    if len(onset_ids) > len(offset_ids):
-        len_diff = len(onset_ids) - len(offset_ids)
-        longer_array = onset_ids
-        shorter_array = offset_ids
-        logger.info(f'Onsets are greater than offsets by {len_diff}')
-    elif len(onset_ids) < len(offset_ids):
-        len_diff = len(offset_ids) - len(onset_ids)
-        longer_array = offset_ids
-        shorter_array = onset_ids
-        logger.info(f'Offsets are greater than offsets by {len_diff}')
-    elif len(onset_ids) == len(offset_ids):
-        logger.info('Chasing events are equal')
-        return category, timestamps
-
-
-    # Correct the wrong chasing events; delete double events
-    wrong_ids = []
-    for i in range(len(longer_array)-(len_diff+1)):
-        if (shorter_array[i] > longer_array[i]) & (shorter_array[i] < longer_array[i+1]):
-            pass
-        else:
-            wrong_ids.append(longer_array[i])
-            longer_array = np.delete(longer_array, i)
-        
-    category = np.delete(
-        category, wrong_ids)
-    timestamps = np.delete(
-        timestamps, wrong_ids)
-    return category, timestamps
-
-
-
 def main(datapath: str):
-    # behabvior is pandas dataframe with all the data
-    bh = Behavior(datapath)
-    # chirps are not sorted in time (presumably due to prior groupings)
-    # get and sort chirps and corresponding fish_ids of the chirps
-    chirps = bh.chirps[np.argsort(bh.chirps)]
-    chirps_fish_ids = bh.chirps_ids[np.argsort(bh.chirps)]
-    category = bh.behavior
-    timestamps = bh.start_s
-    # Correct for doubles in chasing on- and offsets to get the right on-/offset pairs
-    # Get rid of tracking faults (two onsets or two offsets after another)
-    category, timestamps = correct_chasing_events(category, timestamps)
+    
+    foldernames = [datapath + x + '/'  for x in os.listdir(datapath) if os.path.isdir(datapath+x)]
+    for foldername in foldernames:
+        if foldername == '../data/mount_data/2020-05-12-10_00/':
+            continue
+        # behabvior is pandas dataframe with all the data
+        bh = Behavior(foldername)

-    # split categories
-    chasing_onset = (timestamps[category == 0]/ 60) /60
-    chasing_offset = (timestamps[category == 1]/ 60) /60
-    physical_contact = (timestamps[category == 2] / 60) /60
+        category = bh.behavior
+        timestamps = bh.start_s
+        # Correct for doubles in chasing on- and offsets to get the right on-/offset pairs
+        # Get rid of tracking faults (two onsets or two offsets after another)
+        category, timestamps = correct_chasing_events(category, timestamps)

-    all_fish_ids = np.unique(chirps_fish_ids)
-    fish1_id = all_fish_ids[0]
-    fish2_id = all_fish_ids[1]
-    # Associate chirps to inidividual fish
-    fish1 = (chirps[chirps_fish_ids == fish1_id] / 60) /60
-    fish2 = (chirps[chirps_fish_ids == fish2_id] / 60) /60
-    fish1_color = ps.red
-    fish2_color = ps.orange
+        # split categories
+        chasing_onset = (timestamps[category == 0]/ 60) /60
+        chasing_offset = (timestamps[category == 1]/ 60) /60
+        physical_contact = (timestamps[category == 2] / 60) /60

-    fig, ax = plt.subplots(4, 1, figsize=(10, 5), height_ratios=[0.5, 0.5, 0.5, 6], sharex=True)
-    # marker size 
-    s = 200
-    ax[0].scatter(physical_contact, np.ones(len(physical_contact)), color='firebrick', marker='|', s=s)
-    ax[1].scatter(chasing_onset, np.ones(len(chasing_onset)), color='green', marker='|', s=s )
-    ax[2].scatter(fish1, np.ones(len(fish1))-0.25, color=fish1_color, marker='|', s=s)
-    ax[2].scatter(fish2, np.zeros(len(fish2))+0.25, color=fish2_color, marker='|', s=s)
-   
+        all_fish_ids = np.unique(bh.chirps_ids)
+        fish1_id = all_fish_ids[0]
+        fish2_id = all_fish_ids[1]
+        # Associate chirps to inidividual fish
+        fish1 = (bh.chirps[bh.chirps_ids == fish1_id] / 60) /60
+        fish2 = (bh.chirps[bh.chirps_ids == fish2_id] / 60) /60
+        fish1_color = ps.red
+        fish2_color = ps.orange

-    freq_temp = bh.freq[bh.ident==fish1_id]
-    time_temp = bh.time[bh.idx[bh.ident==fish1_id]]
-    ax[3].plot((time_temp/ 60) /60, freq_temp, color=fish1_color)
+        fig, ax = plt.subplots(4, 1, figsize=(10, 5), height_ratios=[0.5, 0.5, 0.5, 6], sharex=True)
+        # marker size 
+        s = 200
+        ax[0].scatter(physical_contact, np.ones(len(physical_contact)), color='firebrick', marker='|', s=s)
+        ax[1].scatter(chasing_onset, np.ones(len(chasing_onset)), color='green', marker='|', s=s )
+        ax[2].scatter(fish1, np.ones(len(fish1))-0.25, color=fish1_color, marker='|', s=s)
+        ax[2].scatter(fish2, np.zeros(len(fish2))+0.25, color=fish2_color, marker='|', s=s)
+    

-    freq_temp = bh.freq[bh.ident==fish2_id]
-    time_temp = bh.time[bh.idx[bh.ident==fish2_id]]
-    ax[3].plot((time_temp/ 60) /60, freq_temp, color=fish2_color)
+        freq_temp = bh.freq[bh.ident==fish1_id]
+        time_temp = bh.time[bh.idx[bh.ident==fish1_id]]
+        ax[3].plot((time_temp/ 60) /60, freq_temp, color=fish1_color)

-    #ax[3].imshow(decibel(bh.spec), extent=[bh.time[0]/60/60, bh.time[-1]/60/60, 0, 2000], aspect='auto', origin='lower')
+        freq_temp = bh.freq[bh.ident==fish2_id]
+        time_temp = bh.time[bh.idx[bh.ident==fish2_id]]
+        ax[3].plot((time_temp/ 60) /60, freq_temp, color=fish2_color)

-        # Hide grid lines
-    ax[0].grid(False)
-    ax[0].set_frame_on(False)
-    ax[0].set_xticks([])
-    ax[0].set_yticks([])
-    ps.hide_ax(ax[0])
+        #ax[3].imshow(decibel(bh.spec), extent=[bh.time[0]/60/60, bh.time[-1]/60/60, 0, 2000], aspect='auto', origin='lower')
+
+            # Hide grid lines
+        ax[0].grid(False)
+        ax[0].set_frame_on(False)
+        ax[0].set_xticks([])
+        ax[0].set_yticks([])
+        ps.hide_ax(ax[0])


-    ax[1].grid(False)
-    ax[1].set_frame_on(False)
-    ax[1].set_xticks([])
-    ax[1].set_yticks([])
-    ps.hide_ax(ax[1])
+        ax[1].grid(False)
+        ax[1].set_frame_on(False)
+        ax[1].set_xticks([])
+        ax[1].set_yticks([])
+        ps.hide_ax(ax[1])

-    ax[2].grid(False)
-    ax[2].set_frame_on(False)
-    ax[2].set_yticks([])
-    ax[2].set_xticks([])
-    ps.hide_ax(ax[2])
+        ax[2].grid(False)
+        ax[2].set_frame_on(False)
+        ax[2].set_yticks([])
+        ax[2].set_xticks([])
+        ps.hide_ax(ax[2])



-    ax[3].axvspan(0, 3, 0, 5, facecolor='grey', alpha=0.5)
-    ax[3].set_xticks(np.arange(0, 6.1, 0.5))
+        ax[3].axvspan(0, 3, 0, 5, facecolor='grey', alpha=0.5)
+        ax[3].set_xticks(np.arange(0, 6.1, 0.5))

-    labelpad = 40
-    ax[0].set_ylabel('Physical contact', rotation=0, labelpad=labelpad)
-    ax[1].set_ylabel('Chasing events', rotation=0, labelpad=labelpad)
-    ax[2].set_ylabel('Chirps', rotation=0, labelpad=labelpad)
-    ax[3].set_ylabel('EODf')
+        labelpad = 40
+        ax[0].set_ylabel('Physical contact', rotation=0, labelpad=labelpad)
+        ax[1].set_ylabel('Chasing events', rotation=0, labelpad=labelpad)
+        ax[2].set_ylabel('Chirps', rotation=0, labelpad=labelpad)
+        ax[3].set_ylabel('EODf')

-    ax[3].set_xlabel('Time [h]')
+        ax[3].set_xlabel('Time [h]')
+        ax[0].set_title(foldername.split('/')[-2])

-    plt.show()
+        plt.show()
    embed()

    # plot chirps
@@ -199,5 +107,5 @@ def main(datapath: str):

 if __name__ == '__main__':
    # Path to the data
-    datapath = '../data/mount_data/2020-05-13-10_00/'
+    datapath = '../data/mount_data/'
    main(datapath)
--- a/recs.csv
+++ b/recs.csv
@@ -0,0 +1,29 @@
+recording
+2020-03-13-10_00
+2020-03-16-10_00
+2020-03-19-10_00
+2020-03-20-10_00
+2020-03-23-09_58
+2020-03-24-10_00
+2020-03-25-10_00
+2020-03-31-09_59
+2020-05-11-10_00
+2020-05-12-10_00
+2020-05-13-10_00
+2020-05-14-10_00
+2020-05-15-10_00
+2020-05-18-10_00
+2020-05-19-10_00
+2020-05-21-10_00
+2020-05-25-10_00
+2020-05-27-10_00
+2020-05-28-10_00
+2020-05-29-10_00
+2020-06-02-10_00
+2020-06-03-10_10
+2020-06-04-10_00
+2020-06-05-10_00
+2020-06-08-10_00
+2020-06-09-10_00
+2020-06-10-10_00
+2020-06-11-10_00
Author	SHA1	Message	Date
wendtalexander	4e7bd40ea4	Merge branch 'master' into chirp_bodylength	2023-01-24 13:24:26 +01:00
wendtalexander	fd2207c8c5	finishing plot chirp_body length	2023-01-24 13:23:12 +01:00
wendtalexander	ce560bf939	export functions in modules, plot chirp	2023-01-24 12:06:29 +01:00
weygoldt	ab263d26a2	Merge branch 'master' of https://whale.am28.uni-tuebingen.de/git/raab/GP2023_chirp_detection	2023-01-24 11:43:15 +01:00
wendtalexander	2a32a29d4e	plot winner loser chirp counts	2023-01-24 11:43:14 +01:00
weygoldt	5763e807d0	better chirpdetector	2023-01-24 11:43:10 +01:00
wendtalexander	f36f8606d8	Merge branch 'master' into chirp_bodylength	2023-01-24 09:12:51 +01:00
wendtalexander	fce3503049	finished scp script	2023-01-24 09:11:54 +01:00
wendtalexander	1064261385	Merge branch 'master' of https://whale.am28.uni-tuebingen.de/git/raab/GP2023_chirp_detection	2023-01-24 09:05:58 +01:00
wendtalexander	fc27fabdb3	scp files	2023-01-24 09:05:57 +01:00
weygoldt	6193dab97d	added recs	2023-01-24 09:04:57 +01:00
wendtalexander	c967a4e5a9	save plot	2023-01-24 08:14:31 +01:00
weygoldt	87d66dfc2f	searchf works and debug mode	2023-01-23 20:27:16 +01:00
weygoldt	6159121d76	Merge branch 'master' of https://whale.am28.uni-tuebingen.de/git/raab/GP2023_chirp_detection	2023-01-23 16:09:43 +01:00
weygoldt	a3ddd49040	all broken	2023-01-23 16:09:16 +01:00
wendtalexander	c6facd6f0c	adding mask for bodylength	2023-01-23 14:51:36 +01:00