gapfinder should work now, need data for testing

This commit is contained in:
weygoldt 2023-01-22 12:23:35 +01:00
parent 056a34ba6b
commit e838185d8a

View File

@ -15,7 +15,6 @@ from modules.plotstyle import PlotStyle
from modules.logger import makeLogger from modules.logger import makeLogger
from modules.datahandling import ( from modules.datahandling import (
flatten, flatten,
norm,
purge_duplicates, purge_duplicates,
group_timestamps, group_timestamps,
instantaneous_frequency, instantaneous_frequency,
@ -351,9 +350,16 @@ def extract_frequency_bands(
def window_median_all_track_ids( def window_median_all_track_ids(
data: LoadData, window_start_seconds: float, window_duration_seconds: float data: LoadData, window_start_seconds: float, window_duration_seconds: float
) -> tuple[float, list[int]]: ) -> tuple[list[tuple[float, float, float]], list[int]]:
""" """
Calculate the median frequency of all fish in a given time window. Calculate the median and quantiles of the frequency of all fish in a
given time window.
Iterate over all track ids and calculate the 25, 50 and 75 percentile
in a given time window to pass this data to 'find_searchband' function,
which then determines whether other fish in the current window fall
within the searchband of the current fish and then determine the
gaps that are outside of the percentile ranges.
Parameters Parameters
---------- ----------
@ -366,14 +372,16 @@ def window_median_all_track_ids(
Returns Returns
------- -------
tuple[float, list[int]] tuple[list[tuple[float, float, float]], list[int]]
""" """
median_freq = [] frequency_percentiles = []
track_ids = [] track_ids = []
for _, track_id in enumerate(np.unique(data.ident[~np.isnan(data.ident)])): for _, track_id in enumerate(np.unique(data.ident[~np.isnan(data.ident)])):
# the window index combines the track id and the time window
window_idx = np.arange(len(data.idx))[ window_idx = np.arange(len(data.idx))[
(data.ident == track_id) (data.ident == track_id)
& (data.time[data.idx] >= window_start_seconds) & (data.time[data.idx] >= window_start_seconds)
@ -384,20 +392,21 @@ def window_median_all_track_ids(
] ]
if len(data.freq[window_idx]) > 0: if len(data.freq[window_idx]) > 0:
median_freq.append(np.median(data.freq[window_idx])) frequency_percentiles.append(
np.percentile(data.freq[window_idx], [25, 50, 75]))
track_ids.append(track_id) track_ids.append(track_id)
# convert to numpy array # convert to numpy array
median_freq = np.asarray(median_freq) frequency_percentiles = np.asarray(frequency_percentiles)
track_ids = np.asarray(track_ids) track_ids = np.asarray(track_ids)
return median_freq, track_ids return frequency_percentiles, track_ids
def find_searchband( def find_searchband(
freq_temp: np.ndarray, current_frequency: np.ndarray,
median_ids: np.ndarray, percentiles_ids: np.ndarray,
median_freq: np.ndarray, frequency_percentiles: np.ndarray,
config: ConfLoader, config: ConfLoader,
data: LoadData, data: LoadData,
) -> float: ) -> float:
@ -407,13 +416,13 @@ def find_searchband(
Parameters Parameters
---------- ----------
freq_temp : np.ndarray current_frequency : np.ndarray
Current EOD frequency array / the current fish of interest. Current EOD frequency array / the current fish of interest.
median_ids : np.ndarray percentiles_ids : np.ndarray
Array of track IDs of the medians of all other fish in the current Array of track IDs of the medians of all other fish in the current
window. window.
median_freq : np.ndarray frequency_percentiles : np.ndarray
Array of median frequencies of all other fish in the current window. Array of percentiles frequencies of all other fish in the current window.
config : ConfLoader config : ConfLoader
Configuration file. Configuration file.
data : LoadData data : LoadData
@ -424,19 +433,27 @@ def find_searchband(
float float
""" """
# frequency where second filter filters # frequency window where second filter filters is potentially allowed
# to filter. This is the search window, in which we want to find
# a gap in the other fish's EODs.
search_window = np.arange( search_window = np.arange(
np.median(freq_temp) + config.search_df_lower, np.median(current_frequency) + config.search_df_lower,
np.median(freq_temp) + config.search_df_upper, np.median(current_frequency) + config.search_df_upper,
config.search_res, config.search_res,
) )
# search window in boolean # search window in boolean
search_window_bool = np.ones(len(search_window), dtype=bool) search_window_bool = np.ones_like(len(search_window), dtype=bool)
# make seperate arrays from the qartiles
q25 = np.asarray([i[0] for i in frequency_percentiles])
q75 = np.asarray([i[2] for i in frequency_percentiles])
# get tracks that fall into search window # get tracks that fall into search window
check_track_ids = median_ids[ check_track_ids = percentiles_ids[
(median_freq > search_window[0]) & (median_freq < search_window[-1]) (q25 > search_window[0]) & (
q75 < search_window[-1])
] ]
# iterate through theses tracks # iterate through theses tracks
@ -444,25 +461,26 @@ def find_searchband(
for j, check_track_id in enumerate(check_track_ids): for j, check_track_id in enumerate(check_track_ids):
q1, q2 = np.percentile( q25_temp = q25[percentiles_ids == check_track_id]
data.freq[data.ident == check_track_id], [25, 75] q75_temp = q75[percentiles_ids == check_track_id]
)
print(q1, q2) print(q25_temp, q75_temp)
search_window_bool[ search_window_bool[
(search_window > q1) & (search_window < q2) (search_window > q25_temp) & (search_window < q75_temp)
] = False ] = False
# find gaps in search window # find gaps in search window
search_window_indices = np.arange(len(search_window)) search_window_indices = np.arange(len(search_window))
# get search window gaps # get search window gaps
# taking the diff of a boolean array gives non zero values where the
# array changes from true to false or vice versa
search_window_gaps = np.diff(search_window_bool, append=np.nan) search_window_gaps = np.diff(search_window_bool, append=np.nan)
nonzeros = search_window_gaps[np.nonzero(search_window_gaps)[0]] nonzeros = search_window_gaps[np.nonzero(search_window_gaps)[0]]
nonzeros = nonzeros[~np.isnan(nonzeros)] nonzeros = nonzeros[~np.isnan(nonzeros)]
embed()
# if the first value is -1, the array starst with true, so a gap # if the first value is -1, the array starst with true, so a gap
if nonzeros[0] == -1: if nonzeros[0] == -1:
stops = search_window_indices[search_window_gaps == -1] stops = search_window_indices[search_window_gaps == -1]
@ -494,14 +512,16 @@ def find_searchband(
search_windows_lens = [len(x) for x in search_windows] search_windows_lens = [len(x) for x in search_windows]
longest_search_window = search_windows[np.argmax(search_windows_lens)] longest_search_window = search_windows[np.argmax(search_windows_lens)]
# the center of the search frequency band is then the center of
# the longest gap
search_freq = ( search_freq = (
longest_search_window[-1] - longest_search_window[0] longest_search_window[-1] - longest_search_window[0]
) / 2 ) / 2
else: return search_freq
search_freq = config.default_search_freq
return search_freq return config.default_search_freq
def main(datapath: str, plot: str) -> None: def main(datapath: str, plot: str) -> None:
@ -637,10 +657,10 @@ def main(datapath: str, plot: str) -> None:
search_frequency = find_searchband( search_frequency = find_searchband(
config=config, config=config,
freq_temp=current_frequencies, current_frequency=current_frequencies,
median_ids=median_ids, percentiles_ids=median_ids,
data=data, data=data,
median_freq=median_freq, frequency_percentiles=median_freq,
) )
# add all chirps that are detected on mulitple electrodes for one # add all chirps that are detected on mulitple electrodes for one
@ -1001,4 +1021,4 @@ if __name__ == "__main__":
datapath = "../data/2022-06-02-10_00/" datapath = "../data/2022-06-02-10_00/"
# datapath = "/home/weygoldt/Data/uni/efishdata/2016-colombia/fishgrid/2016-04-09-22_25/" # datapath = "/home/weygoldt/Data/uni/efishdata/2016-colombia/fishgrid/2016-04-09-22_25/"
# datapath = "/home/weygoldt/Data/uni/chirpdetection/GP2023_chirp_detection/data/mount_data/2020-03-13-10_00/" # datapath = "/home/weygoldt/Data/uni/chirpdetection/GP2023_chirp_detection/data/mount_data/2020-03-13-10_00/"
main(datapath, plot="save") main(datapath, plot="show")