kdes work but scale is wrong

This commit is contained in:
weygoldt 2023-01-25 17:29:26 +01:00
parent 9e5ec1d70b
commit eaa91e1655
4 changed files with 478 additions and 131 deletions

View File

@ -1,98 +0,0 @@
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython import embed
from pandas import read_csv
from modules.logger import makeLogger
from modules.plotstyle import PlotStyle
from modules.datahandling import flatten
from modules.behaviour_handling import Behavior, correct_chasing_events, event_triggered_chirps
from extract_chirps import get_valid_datasets
logger = makeLogger(__name__)
ps = PlotStyle()
def get_chirp_winner_loser(folder_name, Behavior, order_meta_df):
foldername = folder_name.split('/')[-2]
winner_row = order_meta_df[order_meta_df['recording'] == foldername]
winner = winner_row['winner'].values[0].astype(int)
winner_fish1 = winner_row['fish1'].values[0].astype(int)
winner_fish2 = winner_row['fish2'].values[0].astype(int)
if winner > 0:
if winner == winner_fish1:
winner_fish_id = winner_row['rec_id1'].values[0]
loser_fish_id = winner_row['rec_id2'].values[0]
elif winner == winner_fish2:
winner_fish_id = winner_row['rec_id2'].values[0]
loser_fish_id = winner_row['rec_id1'].values[0]
chirp_winner = Behavior.chirps[Behavior.chirps_ids == winner_fish_id]
chirp_loser = Behavior.chirps[Behavior.chirps_ids == loser_fish_id]
return chirp_winner, chirp_loser
else:
return None, None
def main(dataroot):
foldernames, _ = get_valid_datasets(dataroot)
meta_path = (
'/').join(foldernames[0].split('/')[:-2]) + '/order_meta.csv'
meta = pd.read_csv(meta_path)
meta['recording'] = meta['recording'].str[1:-1]
winner_chirps = []
loser_chirps = []
onsets = []
offsets = []
physicals = []
# Iterate over all recordings and save chirp- and event-timestamps
for folder in foldernames:
logger.info('Loading data from folder: {}'.format(folder))
time_before = 30
time_after = 60
dt = 0.1
kernel_width = 2
kde_time = np.arange(-time_before, time_after, dt)
broken_folders = ['../data/mount_data/2020-05-12-10_00/']
if folder in broken_folders:
continue
bh = Behavior(folder)
winner, loser = get_chirp_winner_loser(folder, bh, meta)
if winner is None:
continue
# Chirps are already sorted
winner_chirps.append(bh.chirps)
loser_chirps.append(bh.chirps)
# Correct for doubles in chasing on- and offsets to get the right on-/offset pairs
# Get rid of tracking faults (two onsets or two offsets after another)
category, timestamps = correct_chasing_events(bh.behavior, bh.start_s)
# Split categories
onsets.append(timestamps[category == 0])
offsets.append(timestamps[category == 1])
physicals.append(timestamps[category == 2])
# center chirps around events
if __name__ == '__main__':
main('../data/mount_data/')

View File

@ -345,7 +345,7 @@ def main(datapath: str):
np.random.shuffle(diff_physical)
shuffled_physical = np.cumsum(diff_physical)
kde_onset = (acausal_kde1d(shuffled_onset, time, width))/(27*100)
kde_onset (acausal_kde1d(shuffled_onset, time, width))/(27*100)
kde_offset = (acausal_kde1d(shuffled_offset, time, width))/(27*100)
kde_physical = (acausal_kde1d(shuffled_physical, time, width))/(27*100)

View File

@ -1,13 +1,10 @@
import numpy as np
import os
from IPython import embed
from pandas import read_csv
from modules.logger import makeLogger
from modules.datahandling import causal_kde1d, acausal_kde1d
from modules.datahandling import causal_kde1d, acausal_kde1d, flatten
logger = makeLogger(__name__)
@ -43,7 +40,7 @@ class Behavior:
# csv_filename = [f for f in os.listdir(
# folder_path) if f.endswith('.csv')][0]
logger.info(f'CSV file: {csv_filename}')
# logger.info(f'CSV file: {csv_filename}')
self.dataframe = read_csv(os.path.join(folder_path, csv_filename))
self.chirps = np.load(os.path.join(
@ -92,6 +89,12 @@ def correct_chasing_events(
wrong_bh = np.arange(len(category))[
category != 2][:-1][np.diff(category[category != 2]) == 0]
if category[category != 2][-1] == 0:
wrong_bh = np.append(
wrong_bh,
np.arange(len(category))[category != 2][-1])
if onset_ids[0] > offset_ids[0]:
offset_ids = np.delete(offset_ids, 0)
help_index = offset_ids[0]
@ -106,51 +109,61 @@ def correct_chasing_events(
len(category))[category == 1]
# Check whether on- or offset is longer and calculate length difference
if len(new_onset_ids) > len(new_offset_ids):
len_diff = len(onset_ids) - len(offset_ids)
logger.info(f'Onsets are greater than offsets by {len_diff}')
embed()
logger.warning('Onsets are greater than offsets')
elif len(new_onset_ids) < len(new_offset_ids):
len_diff = len(offset_ids) - len(onset_ids)
logger.info(f'Offsets are greater than onsets by {len_diff}')
logger.warning('Offsets are greater than onsets')
elif len(new_onset_ids) == len(new_offset_ids):
logger.info('Chasing events are equal')
# logger.info('Chasing events are equal')
pass
return category, timestamps
def event_triggered_chirps(
event: np.ndarray,
def center_chirps(
events: np.ndarray,
chirps: np.ndarray,
time_before_event: int,
time_after_event: int,
dt: float,
width: float,
# dt: float,
# width: float,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
event_chirps = [] # chirps that are in specified window around event
# timestamps of chirps around event centered on the event timepoint
centered_chirps = []
for event_timestamp in event:
for event_timestamp in events:
start = event_timestamp - time_before_event
stop = event_timestamp + time_after_event
chirps_around_event = [c for c in chirps if (c >= start) & (c <= stop)]
event_chirps.append(chirps_around_event)
if len(chirps_around_event) == 0:
continue
else:
centered_chirps.append(chirps_around_event - event_timestamp)
event_chirps.append(chirps_around_event)
centered_chirps = np.sort(flatten(centered_chirps))
event_chirps = np.sort(flatten(event_chirps))
if len(centered_chirps) != len(event_chirps):
raise ValueError(
'Non centered chirps and centered chirps are not equal')
time = np.arange(-time_before_event, time_after_event, dt)
# time = np.arange(-time_before_event, time_after_event, dt)
# Kernel density estimation with some if's
if len(centered_chirps) == 0:
centered_chirps = np.array([])
centered_chirps_convolved = np.zeros(len(time))
else:
# convert list of arrays to one array for plotting
centered_chirps = np.concatenate(centered_chirps, axis=0)
centered_chirps_convolved = (acausal_kde1d(
centered_chirps, time, width)) / len(event)
# # Kernel density estimation with some if's
# if len(centered_chirps) == 0:
# centered_chirps = np.array([])
# centered_chirps_convolved = np.zeros(len(time))
# else:
# # convert list of arrays to one array for plotting
# centered_chirps = np.concatenate(centered_chirps, axis=0)
# centered_chirps_convolved = (acausal_kde1d(
# centered_chirps, time, width)) / len(event)
return event_chirps, centered_chirps, centered_chirps_convolved
return centered_chirps

432
code/plot_kdes.py Normal file
View File

@ -0,0 +1,432 @@
from extract_chirps import get_valid_datasets
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython import embed
from pandas import read_csv
from modules.logger import makeLogger
from modules.datahandling import flatten, causal_kde1d, acausal_kde1d
from modules.behaviour_handling import (
Behavior, correct_chasing_events, center_chirps)
from modules.plotstyle import PlotStyle
logger = makeLogger(__name__)
ps = PlotStyle()
def jackknife(data, nresamples, subsetsize, kde_time, kernel_width):
if len(data) == 0:
return []
jackknifed_kdes = []
data = np.sort(data)
subsetsize = int(np.round(len(data)*subsetsize))
for n in range(nresamples):
subset = np.random.choice(data, subsetsize, replace=False)
subset_kde = acausal_kde1d(subset, time=kde_time, width=kernel_width)
jackknifed_kdes.append(subset_kde)
return jackknifed_kdes
def bootstrap(data, nresamples, kde_time, kernel_width, event_times, time_before, time_after):
bootstrapped_kdes = []
data = data[data <= 3*60*60] # only night time
if len(data) == 0:
logger.info('No data for bootstrap, added zeros')
return [np.zeros_like(kde_time) for i in range(nresamples)]
diff_data = np.diff(np.sort(data), prepend=np.sort(data)[0])
for i in tqdm(range(nresamples)):
np.random.shuffle(diff_data)
bootstrapped_data = np.cumsum(diff_data)
bootstrap_data_centered = center_chirps(
bootstrapped_data, event_times, time_before, time_after)
bootstrapped_kde = acausal_kde1d(
bootstrap_data_centered, time=kde_time, width=kernel_width)
bootstrapped_kdes.append(bootstrapped_kde)
return bootstrapped_kdes
def get_chirp_winner_loser(folder_name, Behavior, order_meta_df):
foldername = folder_name.split('/')[-2]
winner_row = order_meta_df[order_meta_df['recording'] == foldername]
winner = winner_row['winner'].values[0].astype(int)
winner_fish1 = winner_row['fish1'].values[0].astype(int)
winner_fish2 = winner_row['fish2'].values[0].astype(int)
if winner > 0:
if winner == winner_fish1:
winner_fish_id = winner_row['rec_id1'].values[0]
loser_fish_id = winner_row['rec_id2'].values[0]
elif winner == winner_fish2:
winner_fish_id = winner_row['rec_id2'].values[0]
loser_fish_id = winner_row['rec_id1'].values[0]
chirp_winner = Behavior.chirps[Behavior.chirps_ids == winner_fish_id]
chirp_loser = Behavior.chirps[Behavior.chirps_ids == loser_fish_id]
return chirp_winner, chirp_loser
return None, None
def main(dataroot):
foldernames, _ = get_valid_datasets(dataroot)
plot_all = False
time_before = 60
time_after = 60
dt = 0.001
kernel_width = 1
kde_time = np.arange(-time_before, time_after, dt)
nbootstraps = 2
meta_path = (
'/').join(foldernames[0].split('/')[:-2]) + '/order_meta.csv'
meta = pd.read_csv(meta_path)
meta['recording'] = meta['recording'].str[1:-1]
winner_onsets = []
winner_offsets = []
winner_physicals = []
loser_onsets = []
loser_offsets = []
loser_physicals = []
winner_onsets_boot = []
winner_offsets_boot = []
winner_physicals_boot = []
loser_onsets_boot = []
loser_offsets_boot = []
loser_physicals_boot = []
onset_count = 0
offset_count = 0
physical_count = 0
# Iterate over all recordings and save chirp- and event-timestamps
for folder in tqdm(foldernames):
foldername = folder.split('/')[-2]
# logger.info('Loading data from folder: {}'.format(foldername))
broken_folders = ['../data/mount_data/2020-05-12-10_00/']
if folder in broken_folders:
continue
bh = Behavior(folder)
category, timestamps = correct_chasing_events(bh.behavior, bh.start_s)
winner, loser = get_chirp_winner_loser(folder, bh, meta)
if winner is None:
continue
onsets = (timestamps[category == 0])
offsets = (timestamps[category == 1])
physicals = (timestamps[category == 2])
onset_count += len(onsets)
offset_count += len(offsets)
physical_count += len(physicals)
winner_onsets.append(center_chirps(
winner, onsets, time_before, time_after))
winner_offsets.append(center_chirps(
winner, offsets, time_before, time_after))
winner_physicals.append(center_chirps(
winner, physicals, time_before, time_after))
loser_onsets.append(center_chirps(
loser, onsets, time_before, time_after))
loser_offsets.append(center_chirps(
loser, offsets, time_before, time_after))
loser_physicals.append(center_chirps(
loser, physicals, time_before, time_after))
# bootstrap
winner_onsets_boot.append(bootstrap(
winner,
nresamples=nbootstraps,
kde_time=kde_time,
kernel_width=kernel_width,
event_times=onsets,
time_before=time_before,
time_after=time_after))
winner_offsets_boot.append(bootstrap(
winner,
nresamples=nbootstraps,
kde_time=kde_time,
kernel_width=kernel_width,
event_times=offsets,
time_before=time_before,
time_after=time_after))
winner_physicals_boot.append(bootstrap(
winner,
nresamples=nbootstraps,
kde_time=kde_time,
kernel_width=kernel_width,
event_times=physicals,
time_before=time_before,
time_after=time_after))
loser_onsets_boot.append(bootstrap(
loser,
nresamples=nbootstraps,
kde_time=kde_time,
kernel_width=kernel_width,
event_times=onsets,
time_before=time_before,
time_after=time_after))
loser_offsets_boot.append(bootstrap(
loser,
nresamples=nbootstraps,
kde_time=kde_time,
kernel_width=kernel_width,
event_times=offsets,
time_before=time_before,
time_after=time_after))
loser_physicals_boot.append(bootstrap(
loser,
nresamples=nbootstraps,
kde_time=kde_time,
kernel_width=kernel_width,
event_times=physicals,
time_before=time_before,
time_after=time_after))
if plot_all:
winner_onsets_conv = acausal_kde1d(
winner_onsets[-1], kde_time, kernel_width)
winner_offsets_conv = acausal_kde1d(
winner_offsets[-1], kde_time, kernel_width)
winner_physicals_conv = acausal_kde1d(
winner_physicals[-1], kde_time, kernel_width)
loser_onsets_conv = acausal_kde1d(
loser_onsets[-1], kde_time, kernel_width)
loser_offsets_conv = acausal_kde1d(
loser_offsets[-1], kde_time, kernel_width)
loser_physicals_conv = acausal_kde1d(
loser_physicals[-1], kde_time, kernel_width)
fig, ax = plt.subplots(2, 3, figsize=(
21*ps.cm, 10*ps.cm), sharey=True, sharex=True)
ax[0, 0].set_title(
f"{foldername}, onsets {len(onsets)}, offsets {len(offsets)}, physicals {len(physicals)},winner {len(winner)}, looser {len(loser)} , onsets")
ax[0, 0].plot(kde_time, winner_onsets_conv/len(onsets))
ax[0, 1].plot(kde_time, winner_offsets_conv/len(offsets))
ax[0, 2].plot(kde_time, winner_physicals_conv/len(physicals))
ax[1, 0].plot(kde_time, loser_onsets_conv/len(onsets))
ax[1, 1].plot(kde_time, loser_offsets_conv/len(offsets))
ax[1, 2].plot(kde_time, loser_physicals_conv/len(physicals))
# # plot bootstrap lines
# for kde in winner_onsets_boot[-1]:
# ax[0, 0].plot(kde_time, kde/len(offsets),
# color='gray')
# for kde in winner_offsets_boot[-1]:
# ax[0, 1].plot(kde_time, kde/len(offsets),
# color='gray')
# for kde in winner_physicals_boot[-1]:
# ax[0, 2].plot(kde_time, kde/len(offsets),
# color='gray')
# for kde in loser_onsets_boot[-1]:
# ax[1, 0].plot(kde_time, kde/len(offsets),
# color='gray')
# for kde in loser_offsets_boot[-1]:
# ax[1, 1].plot(kde_time, kde/len(offsets),
# color='gray')
# for kde in loser_physicals_boot[-1]:
# ax[1, 2].plot(kde_time, kde/len(offsets),
# color='gray')
# plot bootstrap percentiles
ax[0, 0].fill_between(
kde_time,
np.percentile(winner_onsets_boot[-1], 5, axis=0)/len(onsets),
np.percentile(winner_onsets_boot[-1], 95, axis=0)/len(onsets),
color='gray',
alpha=0.5)
ax[0, 1].fill_between(
kde_time,
np.percentile(winner_offsets_boot[-1], 5, axis=0)/len(offsets),
np.percentile(
winner_offsets_boot[-1], 95, axis=0)/len(offsets),
color='gray',
alpha=0.5)
ax[0, 2].fill_between(
kde_time,
np.percentile(
winner_physicals_boot[-1], 5, axis=0)/len(physicals),
np.percentile(
winner_physicals_boot[-1], 95, axis=0)/len(physicals),
color='gray',
alpha=0.5)
ax[1, 0].fill_between(
kde_time,
np.percentile(loser_onsets_boot[-1], 5, axis=0)/len(onsets),
np.percentile(loser_onsets_boot[-1], 95, axis=0)/len(onsets),
color='gray',
alpha=0.5)
ax[1, 1].fill_between(
kde_time,
np.percentile(loser_offsets_boot[-1], 5, axis=0)/len(offsets),
np.percentile(loser_offsets_boot[-1], 95, axis=0)/len(offsets),
color='gray',
alpha=0.5)
ax[1, 2].fill_between(
kde_time,
np.percentile(
loser_physicals_boot[-1], 5, axis=0)/len(physicals),
np.percentile(
loser_physicals_boot[-1], 95, axis=0)/len(physicals),
color='gray',
alpha=0.5)
ax[0, 0].plot(kde_time, np.median(winner_onsets_boot[-1], axis=0)/len(onsets),
color='black', linewidth=2)
ax[0, 1].plot(kde_time, np.median(winner_offsets_boot[-1], axis=0)/len(offsets),
color='black', linewidth=2)
ax[0, 2].plot(kde_time, np.median(winner_physicals_boot[-1], axis=0)/len(physicals),
color='black', linewidth=2)
ax[1, 0].plot(kde_time, np.median(loser_onsets_boot[-1], axis=0)/len(onsets),
color='black', linewidth=2)
ax[1, 1].plot(kde_time, np.median(loser_offsets_boot[-1], axis=0)/len(offsets),
color='black', linewidth=2)
ax[1, 2].plot(kde_time, np.median(loser_physicals_boot[-1], axis=0)/len(physicals),
color='black', linewidth=2)
ax[0, 0].set_xlim(-30, 30)
plt.show()
winner_onsets = np.sort(flatten(winner_onsets))
winner_offsets = np.sort(flatten(winner_offsets))
winner_physicals = np.sort(flatten(winner_physicals))
loser_onsets = np.sort(flatten(loser_onsets))
loser_offsets = np.sort(flatten(loser_offsets))
loser_physicals = np.sort(flatten(loser_physicals))
winner_onsets_conv = acausal_kde1d(
winner_onsets, kde_time, kernel_width)
winner_offsets_conv = acausal_kde1d(
winner_offsets, kde_time, kernel_width)
winner_physicals_conv = acausal_kde1d(
winner_physicals, kde_time, kernel_width)
loser_onsets_conv = acausal_kde1d(
loser_onsets, kde_time, kernel_width)
loser_offsets_conv = acausal_kde1d(
loser_offsets, kde_time, kernel_width)
loser_physicals_conv = acausal_kde1d(
loser_physicals, kde_time, kernel_width)
winner_onsets_conv = winner_onsets_conv / onset_count
winner_offsets_conv = winner_offsets_conv / offset_count
winner_physicals_conv = winner_physicals_conv / physical_count
loser_onsets_conv = loser_onsets_conv / onset_count
loser_offsets_conv = loser_offsets_conv / offset_count
loser_physicals_conv = loser_physicals_conv / physical_count
embed()
winner_onsets_boot = np.concatenate(
winner_onsets_boot) / onset_count
winner_offsets_boot = np.concatenate(
winner_offsets_boot) / offset_count
winner_physicals_boot = np.concatenate(
winner_physicals_boot) / physical_count
loser_onsets_boot = np.concatenate(
loser_onsets_boot) / onset_count
loser_offsets_boot = np.concatenate(
loser_offsets_boot) / offset_count
loser_physicals_boot = np.concatenate(
loser_physicals_boot) / physical_count
percs = [5, 50, 95]
winner_onsets_boot_quarts = np.percentile(
winner_onsets_boot, percs, axis=0)
winner_offsets_boot_quarts = np.percentile(
winner_offsets_boot, percs, axis=0)
winner_physicals_boot_quarts = np.percentile(
winner_physicals_boot, percs, axis=0)
loser_onsets_boot_quarts = np.percentile(
loser_onsets_boot, percs, axis=0)
loser_offsets_boot_quarts = np.percentile(
loser_offsets_boot, percs, axis=0)
loser_physicals_boot_quarts = np.percentile(
loser_physicals_boot, percs, axis=0)
fig, ax = plt.subplots(2, 3, figsize=(
21*ps.cm, 10*ps.cm), sharey=True, sharex=True)
ax[0, 0].plot(kde_time, winner_onsets_conv)
ax[0, 0].fill_between(kde_time,
winner_onsets_boot_quarts[0],
winner_onsets_boot_quarts[2],
color=ps.gray,
alpha=0.5)
ax[0, 0].plot(kde_time, winner_onsets_boot_quarts[1], c=ps.black)
ax[0, 1].plot(kde_time, winner_offsets_conv)
ax[0, 1].fill_between(kde_time,
winner_offsets_boot_quarts[0],
winner_offsets_boot_quarts[2],
color=ps.gray,
alpha=0.5)
ax[0, 1].plot(kde_time, winner_offsets_boot_quarts[1], c=ps.black)
ax[0, 2].plot(kde_time, winner_physicals_conv)
ax[0, 2].fill_between(kde_time,
loser_physicals_boot_quarts[0],
loser_physicals_boot_quarts[2],
color=ps.gray,
alpha=0.5)
ax[0, 2].plot(kde_time, winner_physicals_boot_quarts[1], c=ps.black)
ax[1, 0].plot(kde_time, loser_onsets_conv)
ax[1, 0].fill_between(kde_time,
loser_onsets_boot_quarts[0],
loser_onsets_boot_quarts[2],
color=ps.gray,
alpha=0.5)
ax[1, 0].plot(kde_time, loser_onsets_boot_quarts[1], c=ps.black)
ax[1, 1].plot(kde_time, loser_offsets_conv)
ax[1, 1].fill_between(kde_time,
loser_offsets_boot_quarts[0],
loser_offsets_boot_quarts[2],
color=ps.gray,
alpha=0.5)
ax[1, 1].plot(kde_time, loser_offsets_boot_quarts[1], c=ps.black)
ax[1, 2].plot(kde_time, loser_physicals_conv)
ax[1, 2].fill_between(kde_time,
loser_physicals_boot_quarts[0],
loser_physicals_boot_quarts[2],
color=ps.gray,
alpha=0.5)
ax[1, 2].plot(kde_time, loser_physicals_boot_quarts[1], c=ps.black)
plt.show()
if __name__ == '__main__':
main('../data/mount_data/')