Merge branch 'master' into chirp_bodylength

This commit is contained in:
wendtalexander 2023-01-24 09:12:51 +01:00
commit f36f8606d8
6 changed files with 241 additions and 78 deletions

View File

@ -18,6 +18,7 @@ from modules.datahandling import (
purge_duplicates,
group_timestamps,
instantaneous_frequency,
minmaxnorm
)
logger = makeLogger(__name__)
@ -26,7 +27,7 @@ ps = PlotStyle()
@dataclass
class PlotBuffer:
class ChirpPlotBuffer:
"""
Buffer to save data that is created in the main detection loop
@ -83,6 +84,7 @@ class PlotBuffer:
q50 + self.search_frequency + self.config.minimal_bandwidth / 2,
q50 + self.search_frequency - self.config.minimal_bandwidth / 2,
)
print(search_upper, search_lower)
# get indices on raw data
start_idx = (self.t0 - 5) * self.data.raw_rate
@ -94,7 +96,8 @@ class PlotBuffer:
self.time = self.time - self.t0
self.frequency_time = self.frequency_time - self.t0
chirps = np.asarray(chirps) - self.t0
if len(chirps) > 0:
chirps = np.asarray(chirps) - self.t0
self.t0_old = self.t0
self.t0 = 0
@ -130,7 +133,7 @@ class PlotBuffer:
data_oi,
self.data.raw_rate,
self.t0 - 5,
[np.max(self.frequency) - 200, np.max(self.frequency) + 200]
[np.min(self.frequency) - 100, np.max(self.frequency) + 200]
)
for track_id in self.data.ids:
@ -145,14 +148,15 @@ class PlotBuffer:
# get tracked frequencies and their times
f = self.data.freq[window_idx]
t = self.data.time[
self.data.idx[self.data.ident == self.track_id]]
tmask = (t >= t0_track) & (t <= (t0_track + dt_track))
# t = self.data.time[
# self.data.idx[self.data.ident == self.track_id]]
# tmask = (t >= t0_track) & (t <= (t0_track + dt_track))
t = self.data.time[self.data.idx[window_idx]]
if track_id == self.track_id:
ax0.plot(t[tmask]-self.t0_old, f, lw=lw,
ax0.plot(t-self.t0_old, f, lw=lw,
zorder=10, color=ps.gblue1)
else:
ax0.plot(t[tmask]-self.t0_old, f, lw=lw,
ax0.plot(t-self.t0_old, f, lw=lw,
zorder=10, color=ps.gray, alpha=0.5)
ax0.fill_between(
@ -180,10 +184,11 @@ class PlotBuffer:
# spec_times[0], spec_times[-1],
# color=ps.gblue2, lw=2, ls="dashed")
for chirp in chirps:
ax0.scatter(
chirp, np.median(self.frequency) + 150, c=ps.black, marker="v"
)
if len(chirps) > 0:
for chirp in chirps:
ax0.scatter(
chirp, np.median(self.frequency) + 150, c=ps.black, marker="v"
)
# plot waveform of filtered signal
ax1.plot(self.time, self.baseline * waveform_scaler,
@ -318,7 +323,7 @@ def plot_spectrogram(
aspect="auto",
origin="lower",
interpolation="gaussian",
alpha=1,
alpha=0.6,
)
# axis.use_sticky_edges = False
return spec_times
@ -431,6 +436,28 @@ def window_median_all_track_ids(
return frequency_percentiles, track_ids
def array_center(array: np.ndarray) -> float:
"""
Return the center value of an array.
If the array length is even, returns
the mean of the two center values.
Parameters
----------
array : np.ndarray
Array to calculate the center from.
Returns
-------
float
"""
if len(array) % 2 == 0:
return np.mean(array[int(len(array) / 2) - 1:int(len(array) / 2) + 1])
else:
return array[int(len(array) / 2)]
def find_searchband(
current_frequency: np.ndarray,
percentiles_ids: np.ndarray,
@ -464,15 +491,17 @@ def find_searchband(
# frequency window where second filter filters is potentially allowed
# to filter. This is the search window, in which we want to find
# a gap in the other fish's EODs.
current_median = np.median(current_frequency)
search_window = np.arange(
np.median(current_frequency) + config.search_df_lower,
np.median(current_frequency) + config.search_df_upper,
current_median + config.search_df_lower,
current_median + config.search_df_upper,
config.search_res,
)
# search window in boolean
search_window_bool = np.ones_like(len(search_window), dtype=bool)
bool_lower = np.ones_like(search_window, dtype=bool)
bool_upper = np.ones_like(search_window, dtype=bool)
search_window_bool = np.ones_like(search_window, dtype=bool)
# make seperate arrays from the qartiles
q25 = np.asarray([i[0] for i in frequency_percentiles])
@ -480,7 +509,7 @@ def find_searchband(
# get tracks that fall into search window
check_track_ids = percentiles_ids[
(q25 > search_window[0]) & (
(q25 > current_median) & (
q75 < search_window[-1])
]
@ -492,11 +521,10 @@ def find_searchband(
q25_temp = q25[percentiles_ids == check_track_id]
q75_temp = q75[percentiles_ids == check_track_id]
print(q25_temp, q75_temp)
search_window_bool[
(search_window > q25_temp) & (search_window < q75_temp)
] = False
bool_lower[search_window > q25_temp - config.search_res] = False
bool_upper[search_window < q75_temp + config.search_res] = False
search_window_bool[(bool_lower == False) &
(bool_upper == False)] = False
# find gaps in search window
search_window_indices = np.arange(len(search_window))
@ -509,6 +537,9 @@ def find_searchband(
nonzeros = search_window_gaps[np.nonzero(search_window_gaps)[0]]
nonzeros = nonzeros[~np.isnan(nonzeros)]
if len(nonzeros) == 0:
return config.default_search_freq
# if the first value is -1, the array starst with true, so a gap
if nonzeros[0] == -1:
stops = search_window_indices[search_window_gaps == -1]
@ -543,16 +574,14 @@ def find_searchband(
# the center of the search frequency band is then the center of
# the longest gap
search_freq = (
longest_search_window[-1] - longest_search_window[0]
) / 2
search_freq = array_center(longest_search_window) - current_median
return search_freq
return config.default_search_freq
def main(datapath: str, plot: str) -> None:
def chirpdetection(datapath: str, plot: str, debug: str = 'false') -> None:
assert plot in [
"save",
@ -560,7 +589,17 @@ def main(datapath: str, plot: str) -> None:
"false",
], "plot must be 'save', 'show' or 'false'"
assert debug in [
"false",
"electrode",
"fish",
], "debug must be 'false', 'electrode' or 'fish'"
if debug != "false":
assert plot == "show", "debug mode only runs when plot is 'show'"
# load raw file
print('datapath', datapath)
data = LoadData(datapath)
# load config file
@ -651,14 +690,14 @@ def main(datapath: str, plot: str) -> None:
# approximate sampling rate to compute expected durations if there
# is data available for this time window for this fish id
track_samplerate = np.mean(1 / np.diff(data.time))
expected_duration = (
(window_start_seconds + window_duration_seconds)
- window_start_seconds
) * track_samplerate
# track_samplerate = np.mean(1 / np.diff(data.time))
# expected_duration = (
# (window_start_seconds + window_duration_seconds)
# - window_start_seconds
# ) * track_samplerate
# check if tracked data available in this window
if len(current_frequencies) < expected_duration / 2:
if len(current_frequencies) < 3:
logger.warning(
f"Track {track_id} has no data in window {st}, skipping."
)
@ -750,11 +789,11 @@ def main(datapath: str, plot: str) -> None:
baseline_envelope = -baseline_envelope
baseline_envelope = envelope(
signal=baseline_envelope,
samplerate=data.raw_rate,
cutoff_frequency=config.baseline_envelope_envelope_cutoff,
)
# baseline_envelope = envelope(
# signal=baseline_envelope,
# samplerate=data.raw_rate,
# cutoff_frequency=config.baseline_envelope_envelope_cutoff,
# )
# compute the envelope of the search band. Peaks in the search
# band envelope correspond to troughs in the baseline envelope
@ -788,25 +827,25 @@ def main(datapath: str, plot: str) -> None:
# compute the envelope of the signal to remove the oscillations
# around the peaks
baseline_frequency_samplerate = np.mean(
np.diff(baseline_frequency_time)
)
# baseline_frequency_samplerate = np.mean(
# np.diff(baseline_frequency_time)
# )
baseline_frequency_filtered = np.abs(
baseline_frequency - np.median(baseline_frequency)
)
baseline_frequency_filtered = highpass_filter(
signal=baseline_frequency_filtered,
samplerate=baseline_frequency_samplerate,
cutoff=config.baseline_frequency_highpass_cutoff,
)
# baseline_frequency_filtered = highpass_filter(
# signal=baseline_frequency_filtered,
# samplerate=baseline_frequency_samplerate,
# cutoff=config.baseline_frequency_highpass_cutoff,
# )
baseline_frequency_filtered = envelope(
signal=-baseline_frequency_filtered,
samplerate=baseline_frequency_samplerate,
cutoff_frequency=config.baseline_frequency_envelope_cutoff,
)
# baseline_frequency_filtered = envelope(
# signal=-baseline_frequency_filtered,
# samplerate=baseline_frequency_samplerate,
# cutoff_frequency=config.baseline_frequency_envelope_cutoff,
# )
# CUT OFF OVERLAP ---------------------------------------------
@ -847,9 +886,9 @@ def main(datapath: str, plot: str) -> None:
# normalize all three feature arrays to the same range to make
# peak detection simpler
baseline_envelope = normalize([baseline_envelope])[0]
search_envelope = normalize([search_envelope])[0]
baseline_frequency_filtered = normalize(
baseline_envelope = minmaxnorm([baseline_envelope])[0]
search_envelope = minmaxnorm([search_envelope])[0]
baseline_frequency_filtered = minmaxnorm(
[baseline_frequency_filtered]
)[0]
@ -890,7 +929,7 @@ def main(datapath: str, plot: str) -> None:
or len(frequency_peak_timestamps) == 0
)
if one_feature_empty:
if one_feature_empty and (debug == 'false'):
continue
# group peak across feature arrays but only if they
@ -911,25 +950,23 @@ def main(datapath: str, plot: str) -> None:
# check it there are chirps detected after grouping, continue
# with the loop if not
if len(singleelectrode_chirps) == 0:
if (len(singleelectrode_chirps) == 0) and (debug == 'false'):
continue
# append chirps from this electrode to the multilectrode list
multielectrode_chirps.append(singleelectrode_chirps)
# only initialize the plotting buffer if chirps are detected
chirp_detected = (
(el == config.number_electrodes - 1)
& (len(singleelectrode_chirps) > 0)
& (plot in ["show", "save"])
)
chirp_detected = (el == (config.number_electrodes - 1)
& (plot in ["show", "save"])
)
if chirp_detected:
if chirp_detected or (debug != 'elecrode'):
logger.debug("Detected chirp, ititialize buffer ...")
# save data to Buffer
buffer = PlotBuffer(
buffer = ChirpPlotBuffer(
config=config,
t0=window_start_seconds,
dt=window_duration_seconds,
@ -954,6 +991,11 @@ def main(datapath: str, plot: str) -> None:
logger.debug("Buffer initialized!")
if debug == "electrode":
logger.info(f'Plotting electrode {el} ...')
buffer.plot_buffer(
chirps=singleelectrode_chirps, plot=plot)
logger.debug(
f"Processed all electrodes for fish {track_id} for this"
"window, sorting chirps ..."
@ -962,7 +1004,7 @@ def main(datapath: str, plot: str) -> None:
# check if there are chirps detected in multiple electrodes and
# continue the loop if not
if len(multielectrode_chirps) == 0:
if (len(multielectrode_chirps) == 0) and (debug == 'false'):
continue
# validate multielectrode chirps, i.e. check if they are
@ -987,9 +1029,15 @@ def main(datapath: str, plot: str) -> None:
# if chirps are detected and the plot flag is set, plot the
# chirps, otheswise try to delete the buffer if it exists
if len(multielectrode_chirps_validated) > 0:
if debug == "fish":
logger.info(f'Plotting fish {track_id} ...')
buffer.plot_buffer(multielectrode_chirps_validated, plot)
if ((len(multielectrode_chirps_validated) > 0) &
(plot in ["show", "save"]) & (debug == 'false')):
try:
buffer.plot_buffer(multielectrode_chirps_validated, plot)
del buffer
except NameError:
pass
else:
@ -1049,4 +1097,4 @@ if __name__ == "__main__":
datapath = "../data/2022-06-02-10_00/"
# datapath = "/home/weygoldt/Data/uni/efishdata/2016-colombia/fishgrid/2016-04-09-22_25/"
# datapath = "/home/weygoldt/Data/uni/chirpdetection/GP2023_chirp_detection/data/mount_data/2020-03-13-10_00/"
main(datapath, plot="save")
chirpdetection(datapath, plot="show", debug="fish")

View File

@ -3,7 +3,7 @@ dataroot: "../data/"
outputdir: "../output/"
# Duration and overlap of the analysis window in seconds
window: 10
window: 5
overlap: 1
edge: 0.25
@ -19,29 +19,29 @@ baseline_frequency_smoothing: 5
# Baseline processing parameters
baseline_envelope_cutoff: 25
baseline_envelope_bandpass_lowf: 4
baseline_envelope_bandpass_lowf: 2
baseline_envelope_bandpass_highf: 100
baseline_envelope_envelope_cutoff: 4
# baseline_envelope_envelope_cutoff: 4
# search envelope processing parameters
search_envelope_cutoff: 5
search_envelope_cutoff: 10
# Instantaneous frequency bandpass filter cutoff frequencies
baseline_frequency_highpass_cutoff: 0.000005
baseline_frequency_envelope_cutoff: 0.000005
# baseline_frequency_highpass_cutoff: 0.000005
# baseline_frequency_envelope_cutoff: 0.000005
# peak detecion parameters
prominence: 0.005
prominence: 0.7
# search freq parameter
search_df_lower: 20
search_df_upper: 100
search_res: 1
search_bandwidth: 10
default_search_freq: 50
search_bandwidth: 20
default_search_freq: 60
# Classify events as chirps if they are less than this time apart
chirp_window_threshold: 0.05
chirp_window_threshold: 0.015

48
code/extract_chirps.py Normal file
View File

@ -0,0 +1,48 @@
import os
import pandas as pd
import numpy as np
from chirpdetection import chirpdetection
from IPython import embed
def main(datapaths):
for path in datapaths:
chirpdetection(path, plot='show', debug='electrode')
if __name__ == '__main__':
dataroot = '../data/mount_data/'
datasets = sorted([name for name in os.listdir(dataroot) if os.path.isdir(
os.path.join(dataroot, name))])
valid_datasets = []
for dataset in datasets:
path = os.path.join(dataroot, dataset)
csv_name = '-'.join(dataset.split('-')[:3]) + '.csv'
if os.path.exists(os.path.join(path, csv_name)) is False:
continue
if os.path.exists(os.path.join(path, 'ident_v.npy')) is False:
continue
ident = np.load(os.path.join(path, 'ident_v.npy'))
number_of_fish = len(np.unique(ident[~np.isnan(ident)]))
if number_of_fish != 2:
continue
valid_datasets.append(dataset)
datapaths = [os.path.join(dataroot, dataset) +
'/' for dataset in valid_datasets]
recs = pd.DataFrame(columns=['recording'], data=valid_datasets)
recs.to_csv('../recs.csv', index=False)
# main(datapaths)
# window 1524 + 244 in dataset index 4 is nice example

35
code/get_behaviour.py Normal file
View File

@ -0,0 +1,35 @@
import os
from paramiko import SSHClient
from scp import SCPClient
from IPython import embed
from pandas import read_csv
ssh = SSHClient()
ssh.load_system_host_keys()
ssh.connect(hostname='kraken',
username='efish',
password='fwNix4U',
)
# SCPCLient takes a paramiko transport as its only argument
scp = SCPClient(ssh.get_transport())
data = read_csv('../recs.csv')
foldernames = data['recording'].values
directory = f'/Users/acfw/Documents/uni_tuebingen/chirpdetection/GP2023_chirp_detection/data/mount_data/'
for foldername in foldernames:
if not os.path.exists(directory+foldername):
os.makedirs(directory+foldername)
files = [('-').join(foldername.split('-')[:3])+'.csv','chirp_ids.npy', 'chirps.npy', 'fund_v.npy', 'ident_v.npy', 'idx_v.npy', 'times.npy', 'spec.npy', 'LED_on_time.npy', 'sign_v.npy']
for f in files:
scp.get(f'/home/efish/behavior/2019_tube_competition/{foldername}/{f}',
directory+foldername)
scp.close()

View File

@ -4,7 +4,7 @@ from scipy.ndimage import gaussian_filter1d
from scipy.stats import gamma, norm
def scale01(data):
def minmaxnorm(data):
"""
Normalize data to [0, 1]
@ -19,7 +19,7 @@ def scale01(data):
Normalized data.
"""
return (2*((data - np.min(data)) / (np.max(data) - np.min(data)))) - 1
return (data - np.min(data)) / (np.max(data) - np.min(data))
def instantaneous_frequency(
@ -168,6 +168,9 @@ def group_timestamps(
]
timestamps.sort()
if len(timestamps) == 0:
return []
groups = []
current_group = [timestamps[0]]

29
recs.csv Normal file
View File

@ -0,0 +1,29 @@
recording
2020-03-13-10_00
2020-03-16-10_00
2020-03-19-10_00
2020-03-20-10_00
2020-03-23-09_58
2020-03-24-10_00
2020-03-25-10_00
2020-03-31-09_59
2020-05-11-10_00
2020-05-12-10_00
2020-05-13-10_00
2020-05-14-10_00
2020-05-15-10_00
2020-05-18-10_00
2020-05-19-10_00
2020-05-21-10_00
2020-05-25-10_00
2020-05-27-10_00
2020-05-28-10_00
2020-05-29-10_00
2020-06-02-10_00
2020-06-03-10_10
2020-06-04-10_00
2020-06-05-10_00
2020-06-08-10_00
2020-06-09-10_00
2020-06-10-10_00
2020-06-11-10_00
1 recording
2 2020-03-13-10_00
3 2020-03-16-10_00
4 2020-03-19-10_00
5 2020-03-20-10_00
6 2020-03-23-09_58
7 2020-03-24-10_00
8 2020-03-25-10_00
9 2020-03-31-09_59
10 2020-05-11-10_00
11 2020-05-12-10_00
12 2020-05-13-10_00
13 2020-05-14-10_00
14 2020-05-15-10_00
15 2020-05-18-10_00
16 2020-05-19-10_00
17 2020-05-21-10_00
18 2020-05-25-10_00
19 2020-05-27-10_00
20 2020-05-28-10_00
21 2020-05-29-10_00
22 2020-06-02-10_00
23 2020-06-03-10_10
24 2020-06-04-10_00
25 2020-06-05-10_00
26 2020-06-08-10_00
27 2020-06-09-10_00
28 2020-06-10-10_00
29 2020-06-11-10_00