Merge branch 'master' into behaviour

This commit is contained in:
sprause 2023-01-20 11:40:50 +01:00
commit 4a29cdf84d
5 changed files with 276 additions and 230 deletions

3
.gitignore vendored
View File

@ -1,9 +1,10 @@
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode # Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode
# Own stuff # Own stuff
data data
env env
output
# Mac Stuff # Mac Stuff
*.DS_Store *.DS_Store

View File

@ -1,4 +1,5 @@
from itertools import combinations, compress from itertools import compress
from dataclasses import dataclass
import numpy as np import numpy as np
from IPython import embed from IPython import embed
@ -10,14 +11,132 @@ from thunderfish.powerspectrum import spectrogram, decibel
from sklearn.preprocessing import normalize from sklearn.preprocessing import normalize
from modules.filters import bandpass_filter, envelope, highpass_filter from modules.filters import bandpass_filter, envelope, highpass_filter
from modules.filehandling import ConfLoader, LoadData from modules.filehandling import ConfLoader, LoadData, make_outputdir
from modules.datahandling import flatten, purge_duplicates from modules.datahandling import flatten, purge_duplicates, group_timestamps
from modules.plotstyle import PlotStyle from modules.plotstyle import PlotStyle
from modules.logger import makeLogger
logger = makeLogger(__name__)
ps = PlotStyle() ps = PlotStyle()
@dataclass
class PlotBuffer:
config: ConfLoader
t0: float
dt: float
track_id: float
electrode: int
data: LoadData
time: np.ndarray
baseline: np.ndarray
baseline_envelope: np.ndarray
baseline_peaks: np.ndarray
search: np.ndarray
search_envelope: np.ndarray
search_peaks: np.ndarray
frequency_time: np.ndarray
frequency: np.ndarray
frequency_filtered: np.ndarray
frequency_peaks: np.ndarray
def plot_buffer(self, chirps: np.ndarray, plot: str) -> None:
logger.debug("Starting plotting")
# make data for plotting
# # get index of track data in this time window
# window_idx = np.arange(len(self.data.idx))[
# (self.data.ident == self.track_id) & (self.data.time[self.data.idx] >= self.t0) & (
# self.data.time[self.data.idx] <= (self.t0 + self.dt))
# ]
# get tracked frequencies and their times
# freq_temp = self.data.freq[window_idx]
# time_temp = self.data.times[window_idx]
# get indices on raw data
start_idx = self.t0 * self.data.raw_rate
window_duration = self.dt * self.data.raw_rate
stop_idx = start_idx + window_duration
# get raw data
data_oi = self.data.raw[start_idx:stop_idx, self.electrode]
fig, axs = plt.subplots(
7,
1,
figsize=(20 / 2.54, 12 / 2.54),
constrained_layout=True,
sharex=True,
sharey='row',
)
# plot spectrogram
plot_spectrogram(axs[0], data_oi, self.data.raw_rate, self.t0)
for chirp in chirps:
axs[0].scatter(chirp, np.median(self.frequency), c=ps.red)
# plot waveform of filtered signal
axs[1].plot(self.time, self.baseline, c=ps.green)
# plot waveform of filtered search signal
axs[2].plot(self.time, self.search)
# plot baseline instantaneos frequency
axs[3].plot(self.frequency_time, self.frequency)
# plot filtered and rectified envelope
axs[4].plot(self.time, self.baseline_envelope)
axs[4].scatter(
(self.time)[self.baseline_peaks],
self.baseline_envelope[self.baseline_peaks],
c=ps.red,
)
# plot envelope of search signal
axs[5].plot(self.time, self.search_envelope)
axs[5].scatter(
(self.time)[self.search_peaks],
self.search_envelope[self.search_peaks],
c=ps.red,
)
# plot filtered instantaneous frequency
axs[6].plot(self.frequency_time, self.frequency_filtered)
axs[6].scatter(
self.frequency_time[self.frequency_peaks],
self.frequency_filtered[self.frequency_peaks],
c=ps.red,
)
axs[0].set_ylim(np.max(self.frequency)-200,
top=np.max(self.frequency)+200)
axs[6].set_xlabel("Time [s]")
axs[0].set_title("Spectrogram")
axs[1].set_title("Fitered baseline")
axs[2].set_title("Fitered above")
axs[3].set_title("Fitered baseline instanenous frequency")
axs[4].set_title("Filtered envelope of baseline envelope")
axs[5].set_title("Search envelope")
axs[6].set_title(
"Filtered absolute instantaneous frequency")
if plot == 'show':
plt.show()
elif plot == 'save':
make_outputdir(self.config.outputdir)
out = make_outputdir(self.config.outputdir +
self.data.datapath.split('/')[-2] + '/')
plt.savefig(f"{out}{self.track_id}_{self.t0}.pdf")
plt.close()
def instantaneos_frequency( def instantaneos_frequency(
signal: np.ndarray, samplerate: int signal: np.ndarray, samplerate: int
) -> tuple[np.ndarray, np.ndarray]: ) -> tuple[np.ndarray, np.ndarray]:
@ -78,6 +197,9 @@ def plot_spectrogram(axis, signal: np.ndarray, samplerate: float, t0: float) ->
t0 : float t0 : float
Start time of the signal. Start time of the signal.
""" """
logger.debug("Plotting spectrogram")
# compute spectrogram # compute spectrogram
spec_power, spec_freqs, spec_times = spectrogram( spec_power, spec_freqs, spec_times = spectrogram(
signal, signal,
@ -137,7 +259,48 @@ def double_bandpass(
return (filtered_baseline, filtered_search_freq) return (filtered_baseline, filtered_search_freq)
def main(datapath: str) -> None: def freqmedian_allfish(data: LoadData, t0: float, dt: float) -> tuple[float, list[int]]:
"""
Calculate the median frequency of all fish in a given time window.
Parameters
----------
data : LoadData
Data to calculate the median frequency from.
t0 : float
Start time of the window.
dt : float
Duration of the window.
Returns
-------
tuple[float, list[int]]
"""
median_freq = []
track_ids = []
for _, track_id in enumerate(np.unique(data.ident[~np.isnan(data.ident)])):
window_idx = np.arange(len(data.idx))[
(data.ident == track_id) & (data.time[data.idx] >= t0) & (
data.time[data.idx] <= (t0 + dt))
]
if len(data.freq[window_idx]) > 0:
median_freq.append(np.median(data.freq[window_idx]))
track_ids.append(track_id)
# convert to numpy array
median_freq = np.asarray(median_freq)
track_ids = np.asarray(track_ids)
return median_freq, track_ids
def main(datapath: str, plot: str) -> None:
assert plot in ["save", "show", "false"]
# load raw file # load raw file
data = LoadData(datapath) data = LoadData(datapath)
@ -165,9 +328,12 @@ def main(datapath: str) -> None:
# make time array for raw data # make time array for raw data
raw_time = np.arange(data.raw.shape[0]) / data.raw_rate raw_time = np.arange(data.raw.shape[0]) / data.raw_rate
# good chirp times for data: 2022-06-02-10_00 # # good chirp times for data: 2022-06-02-10_00
t0 = (3 * 60 * 60 + 6 * 60 + 43.5) * data.raw_rate # t0 = (3 * 60 * 60 + 6 * 60 + 43.5) * data.raw_rate
dt = 60 * data.raw_rate # dt = 60 * data.raw_rate
t0 = 0
dt = data.raw.shape[0]
# generate starting points of rolling window # generate starting points of rolling window
window_starts = np.arange( window_starts = np.arange(
@ -177,15 +343,13 @@ def main(datapath: str) -> None:
dtype=int dtype=int
) )
# ask how many windows should be calulated
nwindows = int(
input("How many windows should be calculated (integer number)? "))
# ititialize lists to store data # ititialize lists to store data
chirps = [] chirps = []
fish_ids = [] fish_ids = []
for st, start_index in enumerate(window_starts[: nwindows]): for st, start_index in enumerate(window_starts):
logger.info(f"Processing window {st} of {len(window_starts)}")
# make t0 and dt # make t0 and dt
t0 = start_index / data.raw_rate t0 = start_index / data.raw_rate
@ -195,24 +359,12 @@ def main(datapath: str) -> None:
stop_index = start_index + window_duration stop_index = start_index + window_duration
# calucate median of fish frequencies in window # calucate median of fish frequencies in window
median_freq = [] median_freq, median_ids = freqmedian_allfish(data, t0, dt)
track_ids = []
for _, track_id in enumerate(np.unique(data.ident[~np.isnan(data.ident)])):
window_idx = np.arange(len(data.idx))[
(data.ident == track_id) & (data.time[data.idx] >= t0) & (
data.time[data.idx] <= (t0 + dt))
]
median_freq.append(np.median(data.freq[window_idx]))
track_ids.append(track_id)
# convert to numpy array
median_freq = np.asarray(median_freq)
track_ids = np.asarray(track_ids)
# iterate through all fish # iterate through all fish
for tr, track_id in enumerate(np.unique(data.ident[~np.isnan(data.ident)])): for tr, track_id in enumerate(np.unique(data.ident[~np.isnan(data.ident)])):
print(f"Track ID: {track_id}") logger.debug(f"Processing track {tr} of {len(data.ids)}")
# get index of track data in this time window # get index of track data in this time window
window_idx = np.arange(len(data.idx))[ window_idx = np.arange(len(data.idx))[
@ -230,19 +382,18 @@ def main(datapath: str) -> None:
expected_duration = ((t0 + dt) - t0) * track_samplerate expected_duration = ((t0 + dt) - t0) * track_samplerate
# check if tracked data available in this window # check if tracked data available in this window
if len(freq_temp) < expected_duration * 0.9: if len(freq_temp) < expected_duration * 0.5:
logger.warning(
f"Track {track_id} has no data in window {st}, skipping.")
continue continue
fig, axs = plt.subplots( # check if there are powers available in this window
7, nanchecker = np.unique(np.isnan(powers_temp))
config.number_electrodes, if (len(nanchecker) == 1) and nanchecker[0] == True:
figsize=(20 / 2.54, 12 / 2.54), logger.warning(
constrained_layout=True, f"No powers available for track {track_id} window {st}, skipping.")
sharex=True, continue
sharey='row',
)
# get best electrode
best_electrodes = np.argsort(np.nanmean( best_electrodes = np.argsort(np.nanmean(
powers_temp, axis=0))[-config.number_electrodes:] powers_temp, axis=0))[-config.number_electrodes:]
@ -255,7 +406,7 @@ def main(datapath: str) -> None:
search_window_bool = np.ones(len(search_window), dtype=bool) search_window_bool = np.ones(len(search_window), dtype=bool)
# get tracks that fall into search window # get tracks that fall into search window
check_track_ids = track_ids[(median_freq > search_window[0]) & ( check_track_ids = median_ids[(median_freq > search_window[0]) & (
median_freq < search_window[-1])] median_freq < search_window[-1])]
# iterate through theses tracks # iterate through theses tracks
@ -318,14 +469,15 @@ def main(datapath: str) -> None:
else: else:
search_freq = config.default_search_freq search_freq = config.default_search_freq
print(f"Search frequency: {search_freq}")
# ----------- chrips on the two best electrodes----------- # ----------- chrips on the two best electrodes-----------
chirps_electrodes = [] chirps_electrodes = []
electrodes_of_chirps = []
# iterate through electrodes # iterate through electrodes
for el, electrode in enumerate(best_electrodes): for el, electrode in enumerate(best_electrodes):
print(el)
logger.debug(
f"Processing electrode {el} of {len(best_electrodes)}")
# load region of interest of raw data file # load region of interest of raw data file
data_oi = data.raw[start_index:stop_index, :] data_oi = data.raw[start_index:stop_index, :]
time_oi = raw_time[start_index:stop_index] time_oi = raw_time[start_index:stop_index]
@ -420,7 +572,7 @@ def main(datapath: str) -> None:
baseline_envelope = normalize([baseline_envelope])[0] baseline_envelope = normalize([baseline_envelope])[0]
search_envelope = normalize([search_envelope])[0] search_envelope = normalize([search_envelope])[0]
inst_freq_filtered = normalize([inst_freq_filtered])[0] inst_freq_filtered = normalize([np.abs(inst_freq_filtered)])[0]
# PEAK DETECTION ---------------------------------------------- # PEAK DETECTION ----------------------------------------------
@ -428,7 +580,7 @@ def main(datapath: str) -> None:
prominence = np.percentile( prominence = np.percentile(
baseline_envelope, config.baseline_prominence_percentile) baseline_envelope, config.baseline_prominence_percentile)
baseline_peaks, _ = find_peaks( baseline_peaks, _ = find_peaks(
np.abs(baseline_envelope), prominence=prominence) baseline_envelope, prominence=prominence)
# detect peaks search_envelope # detect peaks search_envelope
prominence = np.percentile( prominence = np.percentile(
@ -442,82 +594,10 @@ def main(datapath: str) -> None:
config.instantaneous_prominence_percentile config.instantaneous_prominence_percentile
) )
inst_freq_peaks, _ = find_peaks( inst_freq_peaks, _ = find_peaks(
np.abs(inst_freq_filtered), inst_freq_filtered,
prominence=prominence prominence=prominence
) )
# # SAVE DATA ---------------------------------------------------
# PLOT --------------------------------------------------------
# plot spectrogram
plot_spectrogram(
axs[0, el], data_oi[:, electrode], data.raw_rate, t0)
# plot baseline instantaneos frequency
axs[1, el].plot(baseline_freq_time, baseline_freq -
np.median(baseline_freq))
# plot waveform of filtered signal
axs[2, el].plot(time_oi, baseline, c=ps.green)
# plot broad filtered baseline
axs[2, el].plot(
time_oi,
broad_baseline,
)
# plot narrow filtered baseline envelope
axs[2, el].plot(
time_oi,
baseline_envelope_unfiltered,
c=ps.red
)
# plot waveform of filtered search signal
axs[3, el].plot(time_oi, search)
# plot envelope of search signal
axs[3, el].plot(
time_oi,
search_envelope,
c=ps.red
)
# plot filtered and rectified envelope
axs[4, el].plot(time_oi, baseline_envelope)
axs[4, el].scatter(
(time_oi)[baseline_peaks],
baseline_envelope[baseline_peaks],
c=ps.red,
)
# plot envelope of search signal
axs[5, el].plot(time_oi, search_envelope)
axs[5, el].scatter(
(time_oi)[search_peaks],
search_envelope[search_peaks],
c=ps.red,
)
# plot filtered instantaneous frequency
axs[6, el].plot(baseline_freq_time, np.abs(inst_freq_filtered))
axs[6, el].scatter(
baseline_freq_time[inst_freq_peaks],
np.abs(inst_freq_filtered)[inst_freq_peaks],
c=ps.red,
)
axs[6, el].set_xlabel("Time [s]")
axs[0, el].set_title("Spectrogram")
axs[1, el].set_title("Fitered baseline instanenous frequency")
axs[2, el].set_title("Fitered baseline")
axs[3, el].set_title("Fitered above")
axs[4, el].set_title("Filtered envelope of baseline envelope")
axs[5, el].set_title("Search envelope")
axs[6, el].set_title(
"Filtered absolute instantaneous frequency")
# DETECT CHIRPS IN SEARCH WINDOW ------------------------------- # DETECT CHIRPS IN SEARCH WINDOW -------------------------------
baseline_ts = time_oi[baseline_peaks] baseline_ts = time_oi[baseline_peaks]
@ -528,125 +608,66 @@ def main(datapath: str) -> None:
if len(baseline_ts) == 0 or len(search_ts) == 0 or len(freq_ts) == 0: if len(baseline_ts) == 0 or len(search_ts) == 0 or len(freq_ts) == 0:
continue continue
# current_chirps = group_timestamps_v2( current_chirps = group_timestamps(
# [list(baseline_ts), list(search_ts), list(freq_ts)], 3) [list(baseline_ts), list(search_ts), list(freq_ts)], 3, config.chirp_window_threshold)
# get index for each feature
baseline_idx = np.zeros_like(baseline_ts)
search_idx = np.ones_like(search_ts)
freq_idx = np.ones_like(freq_ts) * 2
timestamps_features = np.hstack(
[baseline_idx, search_idx, freq_idx])
timestamps = np.hstack([baseline_ts, search_ts, freq_ts])
# sort timestamps
timestamps_idx = np.arange(len(timestamps))
timestamps_features = timestamps_features[np.argsort(
timestamps)]
timestamps = timestamps[np.argsort(timestamps)]
# # get chirps
# diff = np.empty(timestamps.shape)
# diff[0] = np.inf # always retain the 1st element
# diff[1:] = np.diff(timestamps)
# mask = diff < config.chirp_window_threshold
# shared_peak_indices = timestamp_idx[mask]
current_chirps = []
bool_timestamps = np.ones_like(timestamps, dtype=bool)
for bo, tt in enumerate(timestamps):
if bool_timestamps[bo] == False:
continue
cm = timestamps_idx[(timestamps >= tt) & (
timestamps <= tt + config.chirp_window_threshold)]
if set([0, 1, 2]).issubset(timestamps_features[cm]):
current_chirps.append(np.mean(timestamps[cm]))
electrodes_of_chirps.append(el)
bool_timestamps[cm] = False
# for checking if there are chirps on multiple electrodes # for checking if there are chirps on multiple electrodes
if len(current_chirps) == 0:
continue
chirps_electrodes.append(current_chirps) chirps_electrodes.append(current_chirps)
for ct in current_chirps: if (el == config.number_electrodes - 1) & \
axs[0, el].axvline(ct, color='r', lw=1) (len(current_chirps) > 0) & \
(plot in ["show", "save"]):
axs[0, el].scatter(
baseline_freq_time[inst_freq_peaks], logger.debug("Detected chirp, ititialize buffer ...")
np.ones_like(baseline_freq_time[inst_freq_peaks]) * 600,
c=ps.red, # save data to Buffer
) buffer = PlotBuffer(
axs[0, el].scatter( config=config,
(time_oi)[search_peaks], t0=t0,
np.ones_like((time_oi)[search_peaks]) * 600, dt=dt,
c=ps.red, electrode=electrode,
track_id=track_id,
data=data,
time=time_oi,
baseline=baseline,
baseline_envelope=baseline_envelope,
baseline_peaks=baseline_peaks,
search=search,
search_envelope=search_envelope,
search_peaks=search_peaks,
frequency_time=baseline_freq_time,
frequency=baseline_freq,
frequency_filtered=inst_freq_filtered,
frequency_peaks=inst_freq_peaks,
) )
axs[0, el].scatter( logger.debug("Buffer initialized!")
(time_oi)[baseline_peaks],
np.ones_like((time_oi)[baseline_peaks]) * 600,
c=ps.red,
)
# make one array logger.debug(
chirps_electrodes = np.concatenate(chirps_electrodes) f"Processed all electrodes for fish {track_id} for this window, sorting chirps ...")
# make shure they are numpy arrays
chirps_electrodes = np.asarray(chirps_electrodes)
electrodes_of_chirps = np.asarray(electrodes_of_chirps)
# sort them
sort_chirps_electrodes = chirps_electrodes[np.argsort(
chirps_electrodes)]
sort_electrodes = electrodes_of_chirps[np.argsort(
chirps_electrodes)]
bool_vector = np.ones(len(sort_chirps_electrodes), dtype=bool)
# make index vector
index_vector = np.arange(len(sort_chirps_electrodes))
# make it more than only two electrodes for the search after chirps
combinations_best_elctrodes = list(
combinations(range(3), 2))
the_real_chirps = []
for chirp_index, seoc in enumerate(sort_chirps_electrodes):
if bool_vector[chirp_index] == False:
continue
cm = index_vector[(sort_chirps_electrodes >= seoc) & (
sort_chirps_electrodes <= seoc + config.chirp_window_threshold)]
chirps_unique = [] if len(chirps_electrodes) == 0:
for combination in combinations_best_elctrodes: continue
if set(combination).issubset(sort_electrodes[cm]):
chirps_unique.append(
np.mean(sort_chirps_electrodes[cm]))
the_real_chirps.append(np.mean(chirps_unique)) the_real_chirps = group_timestamps(chirps_electrodes, 2, 0.05)
"""
if set([0,1]).issubset(sort_electrodes[cm]):
the_real_chirps.append(np.mean(sort_chirps_electrodes[cm]))
elif set([1,0]).issubset(sort_electrodes[cm]):
the_real_chirps.append(np.mean(sort_chirps_electrodes[cm]))
elif set([0,2]).issubset(sort_electrodes[cm]):
the_real_chirps.append(np.mean(sort_chirps_electrodes[cm]))
elif set([1,2]).issubset(sort_electrodes[cm]):
the_real_chirps.append(np.mean(sort_chirps_electrodes[cm]))
"""
bool_vector[cm] = False
chirps.append(the_real_chirps) chirps.append(the_real_chirps)
fish_ids.append(track_id) fish_ids.append(track_id)
for ct in the_real_chirps: logger.debug('Found %d chirps, starting plotting ... ' %
axs[0, el].axvline(ct, color='b', lw=1) len(the_real_chirps))
if len(the_real_chirps) > 0:
plt.close() try:
fig, ax = plt.subplots() buffer.plot_buffer(the_real_chirps, plot)
t0 = (3 * 60 * 60 + 6 * 60 + 43.5) except NameError:
data_oi = data.raw[window_starts[0]:window_starts[-1] + int(dt*data.raw_rate), 10] pass
plot_spectrogram(ax, data_oi, data.raw_rate, t0) else:
chirps_concat = np.concatenate(chirps) try:
for ch in chirps_concat: del buffer
ax. axvline(ch, color='b', lw=1) except NameError:
pass
chirps_new = [] chirps_new = []
chirps_ids = [] chirps_ids = []
@ -667,9 +688,10 @@ def main(datapath: str) -> None:
purged_chirps.extend(list(tr_chirps_purged)) purged_chirps.extend(list(tr_chirps_purged))
purged_chirps_ids.extend(list(np.ones_like(tr_chirps_purged)*tr)) purged_chirps_ids.extend(list(np.ones_like(tr_chirps_purged)*tr))
embed() np.save(datapath + 'chirps.npy', purged_chirps)
np.save(datapath + 'chirps_ids.npy', purged_chirps_ids)
if __name__ == "__main__": if __name__ == "__main__":
datapath = "../data/2022-06-02-10_00/" datapath = "../data/2022-06-02-10_00/"
main(datapath) main(datapath, plot="save")

4
code/chirpdetector_conf.yml Normal file → Executable file
View File

@ -1,3 +1,6 @@
dataroot: "../data/"
outputdir: "../output/"
# Duration and overlap of the analysis window in seconds # Duration and overlap of the analysis window in seconds
window: 5 window: 5
overlap: 1 overlap: 1
@ -40,7 +43,6 @@ search_freq_percentiles:
- 95 - 95
default_search_freq: 50 default_search_freq: 50
chirp_window_threshold: 0.05 chirp_window_threshold: 0.05

View File

@ -1,5 +1,5 @@
import numpy as np import numpy as np
from typing import List, Union, Any from typing import List, Any
def purge_duplicates( def purge_duplicates(

View File

@ -36,6 +36,7 @@ class LoadData:
def __init__(self, datapath: str) -> None: def __init__(self, datapath: str) -> None:
# load raw data # load raw data
self.datapath = datapath
self.file = os.path.join(datapath, "traces-grid1.raw") self.file = os.path.join(datapath, "traces-grid1.raw")
self.raw = DataLoader(self.file, 60.0, 0, channel=-1) self.raw = DataLoader(self.file, 60.0, 0, channel=-1)
self.raw_rate = self.raw.samplerate self.raw_rate = self.raw.samplerate
@ -53,3 +54,23 @@ class LoadData:
def __str__(self) -> str: def __str__(self) -> str:
return f"LoadData({self.file})" return f"LoadData({self.file})"
def make_outputdir(path: str) -> str:
"""
Creates a new directory where the path leads if it does not already exist.
Parameters
----------
path : string
path to the new output directory
Returns
-------
string
path of the newly created output directory
"""
if os.path.isdir(path) == False:
os.mkdir(path)
return path