diff --git a/figures/fig_feat_stages.pdf b/figures/fig_feat_stages.pdf index 84974dc..c54f9c4 100644 Binary files a/figures/fig_feat_stages.pdf and b/figures/fig_feat_stages.pdf differ diff --git a/figures/fig_features_cross_species.pdf b/figures/fig_features_cross_species.pdf index 1450418..eb4855d 100644 Binary files a/figures/fig_features_cross_species.pdf and b/figures/fig_features_cross_species.pdf differ diff --git a/figures/fig_invariance_cross_species_thresh_appendix.pdf b/figures/fig_invariance_cross_species_thresh_appendix.pdf index f1aad43..00efa22 100644 Binary files a/figures/fig_invariance_cross_species_thresh_appendix.pdf and b/figures/fig_invariance_cross_species_thresh_appendix.pdf differ diff --git a/figures/fig_kernel_sd_perc_field_appendix.pdf b/figures/fig_kernel_sd_perc_field_appendix.pdf index 8a9e618..1c2996f 100644 Binary files a/figures/fig_kernel_sd_perc_field_appendix.pdf and b/figures/fig_kernel_sd_perc_field_appendix.pdf differ diff --git a/figures/fig_kernel_sd_perc_full_appendix.pdf b/figures/fig_kernel_sd_perc_full_appendix.pdf index a1cccc5..0e33cc7 100644 Binary files a/figures/fig_kernel_sd_perc_full_appendix.pdf and b/figures/fig_kernel_sd_perc_full_appendix.pdf differ diff --git a/figures/fig_kernel_sd_perc_short_appendix.pdf b/figures/fig_kernel_sd_perc_short_appendix.pdf index dda5413..c0ae4c9 100644 Binary files a/figures/fig_kernel_sd_perc_short_appendix.pdf and b/figures/fig_kernel_sd_perc_short_appendix.pdf differ diff --git a/figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf b/figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf index cac07ce..35ddcc1 100644 Binary files a/figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf and b/figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf differ diff --git a/figures/fig_noise_env_sd_conversion_appendix.pdf b/figures/fig_noise_env_sd_conversion_appendix.pdf index be69666..35e9ace 100644 Binary files a/figures/fig_noise_env_sd_conversion_appendix.pdf and b/figures/fig_noise_env_sd_conversion_appendix.pdf differ diff --git a/figures/fig_pre_stages.pdf b/figures/fig_pre_stages.pdf index 2057cd2..2663fdc 100644 Binary files a/figures/fig_pre_stages.pdf and b/figures/fig_pre_stages.pdf differ diff --git a/figures/fig_saturation_log-hp_appendix.pdf b/figures/fig_saturation_log-hp_appendix.pdf index ec1fdf2..a98c33d 100644 Binary files a/figures/fig_saturation_log-hp_appendix.pdf and b/figures/fig_saturation_log-hp_appendix.pdf differ diff --git a/main.pdf b/main.pdf index bb59aee..1a9c36c 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/main.tex b/main.tex index 93e0903..4a064d2 100644 --- a/main.tex +++ b/main.tex @@ -228,6 +228,7 @@ intensity-invariant song representations, the interaction between these mechanisms, the overall capacity for intensity invariance in the system, and the ethological implications of our findings. +\newpage \section{Methods} % This maybe does not quite fit here, but it is the most general part of the % methods and applies throughout the whole section, so I put it here for now. @@ -289,8 +290,8 @@ the following sections. representations~(boxes) and transformations~(arrows) along the model pathway. All representations are time-varying. 1st half: Preprocessing stage~(one-dimensional - representation). 2nd half: Feature extraction - stage~(high-dimensional representation). } + representations). 2nd half: Feature extraction + stage~(high-dimensional representations). } \label{fig:pathway} \end{figure} @@ -347,8 +348,8 @@ the following feature extraction stage. \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_pre_stages.pdf} - \caption{\textbf{Representations of a song of \textit{O. rufipes} during - the preprocessing stage.} + \caption{\textbf{Song representations during the preprocessing stage.} + Example song of \textit{O. rufipes}. \textbf{a}:~Bandpass filtered tympanal signal $\filt(t)$. \textbf{b}:~Signal envelope $\env(t)$. \textbf{c}:~Logarithmically compressed envelope $\db(t)$. @@ -483,14 +484,15 @@ or a simple linear classifier. \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_feat_stages.pdf} - \caption{\textbf{Representations of a song of \textit{O. rufipes} during - the feature extraction stage.} + \caption{\textbf{Song representations during the feature extraction stage.} + Example song of \textit{O. rufipes}. Different color shades indicate different types of Gabor - kernels with specific lobe number $\kn$ and either $+$ or - $-$ sign, sorted (dark to light) first by increasing $\kn$ - and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then - $-$ for each $\kn$; two kernel widths $\kw$ of $4\,$ms and - $32\,$ms per type; 8 types, 16 kernels in total). + kernels with specific lobe number $\kni$ and either $+$ or + $-$ sign, sorted (dark to light) first by increasing + $\kni$ and then by sign~($1\,\leq\,\kni\,\leq\,4$; first + $+$, then $-$ for each $\kni$; two kernel widths $\kwi$ of + $4\,$ms and $32\,$ms per type; 8 types, 16 kernels in + total). \textbf{a}:~Kernel-specific filter responses $c_i(t)$. \textbf{b}:~Binary responses $b_i(t)$. \textbf{c}:~Finalized features $f_i(t)$.} @@ -653,12 +655,13 @@ which is a reasonable assumption for the raw $\soc(t)$ and $\noc(t)$. However, the dependency of the ratio on $\sca$ is not necessarily the same for representations that are transformed from $x(t)$ by nonlinear operations, since these change the relationship of $\soc(t)$ and $\noc(t)$ in an unpredictable -fashion. Furthermore, the ratio is not a proper SNR of the representation -because it does not relate $\soc(t)$ to $\noc(t)$ within the representation but -rather the entire representation to $\noc(t)$ alone. However, it still provides -a useful measure of the relative intensity of a representation with and without -$\soc(t)$, which is the closest we can get to the SNR of the representation. As -such, the ratio of intensity measures is referred to as SNR in the following. +fashion~(see appendix Fig.\,\ref{fig:app_env-sd}). Furthermore, the ratio is +not a proper SNR of the representation because it does not relate $\soc(t)$ to +$\noc(t)$ within the representation but rather the entire representation to +$\noc(t)$ alone. However, it still provides a useful measure of the relative +intensity of a representation with and without $\soc(t)$, which is the closest +we can get to the SNR of the representation. As such, the ratio of intensity +measures is referred to as SNR in the following. % Is this legal? "SNR" is much shorter than "ratio of intensity measure to the pure-noise reference measure". % Haven't used it much yet, sticked to "ratio" in most cases. @@ -694,9 +697,10 @@ $\thr$ has been specified as a multiple of the pure-noise reference standard deviation $\sigma_{c_i}$ for input $x(t)=\noc(t)$. This ensures that $\thr$ as well as the resulting $b_i(t)$ and $f_i(t)$ are comparable across different $k_i(t)$ because each pure-noise $c_i(t)$ approximately follows a normal -distribution~(see appendix +distribution around zero~(see appendix Figs.\,\ref{fig:app_thresh-lp_kern-sd}-\ref{fig:app_field_kern-sd}). +\newpage \section{Results} \subsection{Mechanisms driving the emergence of intensity invariance} @@ -767,25 +771,23 @@ more robust input representation and higher input SNR. \includegraphics[width=\textwidth]{figures/fig_invariance_rect_lp.pdf} \caption{\textbf{Rectification and lowpass filtering improves SNR but does not contribute to intensity invariance.} - Input $\raw(t)$ consists of song component $\soc(t)$ scaled by - $\sca$ with optional noise component $\noc(t)$ and is - successively transformed into tympanal signal $\filt(t)$ and - envelope $\env(t)$. Different line styles indicate different - cutoff frequencies $\fc$ of the lowpass filter extracting - $\env(t)$. - \textbf{Top}:~Example representations of $\filt(t)$ and - $\env(t)$ for different $\sca$. + Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$ with + optional $\noc(t)$ and is successively transformed into + tympanal signal $\filt(t)$ and envelope $\env(t)$. + \textbf{Top}:~Examples of $\filt(t)$ and $\env(t)$ for + different $\sca$. \textbf{a}:~Noiseless case. \textbf{b}:~Noisy case. - \textbf{Bottom}:~Intensity measures over a range of $\sca$. - \textbf{c}:~Noiseless case: Standard deviations $\sigma_x$ of - $\filt(t)$ and $\env(t)$. - \textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\filt(t)$ and - $\env(t)$ to the respective reference standard deviation - $\sigma_{\eta}$ for input $\raw(t)=\noc(t)$. - \textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of + \textbf{Bottom}:~Intensity measures over $\sca$. Different + line styles indicate different cutoff frequencies $\fc$ of the + lowpass filter extracting $\env(t)$. + \textbf{c}:~Noiseless case: Standard deviation $\sigma_x$ of + $\filt(t)$ and $\env(t)$, respectively. + \textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the respective + pure-noise reference $\sigma_{\eta}$ for $\sca=0$. + \textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of $\env(t)$ as in \textbf{d} for different species (averaged - over songs and recordings, see appendix + over songs and recordings, appendix Fig.\,\ref{fig:app_rect-lp}). } \label{fig:rect-lp} @@ -907,28 +909,26 @@ is a recurring phenomenon that is further addressed in the following sections. \caption{\textbf{Intensity invariance through logarithmic compression and adaptation is restricted by the noise floor and decreases SNR.} - Input $\filt(t)$ consists of song component $\soc(t)$ - scaled by $\sca$ with optional noise component $\noc(t)$ + Input $\filt(t)$ consists of $\soc(t)$ + scaled by $\sca$ with optional $\noc(t)$ and is successively transformed into envelope $\env(t)$, logarithmically compressed envelope $\db(t)$, and intensity-adapted envelope $\adapt(t)$. - \textbf{Top}:~Example representations of $\env(t)$, - $\db(t)$, and $\adapt(t)$ for different $\sca$. + \textbf{Top}:~Examples of $\env(t)$, $\db(t)$, and + $\adapt(t)$ for different $\sca$. \textbf{a}:~Noiseless case. \textbf{b}:~Noisy case. - \textbf{Bottom}:~Intensity measures over a range of $\sca$. - \textbf{c}:~Noiseless case: Standard deviations $\sigma_x$ - of $\env(t)$, $\db(t)$, and $\adapt(t)$. - \textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\env(t)$, - $\db(t)$, and $\adapt(t)$ to the respective reference - standard deviation $\sigma_{\eta}$ for input - $\filt(t)=\noc(t)$. Shaded areas indicate $5\,\%$ (dark - grey) and $95\,\%$ (light grey) curve span for - $\adapt(t)$. - \textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of + \textbf{Bottom}:~Intensity measures over $\sca$. + \textbf{c}:~Noiseless case: Standard deviation $\sigma_x$ + of $\env(t)$, $\db(t)$, and $\adapt(t)$, respectively. + \textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the + respective pure-noise reference $\sigma_{\eta}$ for + $\sca=0$. Shaded areas indicate $5\,\%$ (dark grey) and + $95\,\%$ (light grey) curve span for $\adapt(t)$. + \textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of $\adapt(t)$ as in \textbf{d} for different species - (averaged over songs and recordings, see appendix - Fig\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$ + (averaged over songs and recordings, appendix + Fig.\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$ curve span per species. } \label{fig:log-hp} @@ -1043,33 +1043,32 @@ intensity invariance are further explored in a later section. \caption{\textbf{Intensity invariance through thresholding and temporal averaging is mediated by the interaction of threshold value and noise floor.} - Input $\adapt(t)$ consists of song component $\soc(t)$ - scaled by $\sca$ with optional noise component $\noc(t)$ - and is transformed into single kernel response $c(t)$, - binary response $b(t)$, and feature $f(t)$. Different - color shades indicate different threshold values $\Theta$ - (multiples of reference standard deviation $\sigma_{\eta}$ - of $c(t)$ for input $\adapt(t)=\noc(t)$, with darker - colors for higher $\Theta$). - \textbf{Left}:~Noisy case: Example representations of - $\adapt(t)$ as well as $c(t)$, $b(t)$, and $f(t)$ for - different $\sca$. + Input $\adapt(t)$ consists $\soc(t)$ scaled by $\sca$ with + optional $\noc(t)$ and is transformed into single kernel + response $c(t)$, binary response $b(t)$, and feature + $f(t)$. Different color shades indicate different + threshold values $\Theta$ (multiples of pure-noise + standard deviation $\sigma_{\eta}$ of $c(t)$ for $\sca=0$, + with darker colors for higher $\Theta$. See also appendix + Fig.\,\ref{fig:app_thresh-lp_kern-sd}). + \textbf{Left}:~Noisy case: Examples of $\adapt(t)$ as well + as $c(t)$, $b(t)$, and $f(t)$ for different $\sca$. \textbf{a}:~$\adapt(t)$ with kernel $k(t)$ in black. \textbf{b\,-\,d}: $c(t)$, $b(t)$, and $f(t)$ based on the - same $\adapt(t)$ from \textbf{a} but with different + same $\adapt(t)$ from \textbf{a} but for different $\Theta$. \textbf{Right}:~Average value $\mu_f$ of $f(t)$ for each $\Theta$ from \textbf{b\,-\,d}. Dots indicate $95\,\%$ curve span (noisy case). - \textbf{e}:~$\mu_f$ over a range of $\sca$, once for the - noisy case (solid lines) and once for the noiseless case - (dotted lines). - \textbf{f}:~Noisy case: $\mu_f$ over the standard - deviation of input $\adapt$ corresponding to the values of - $\sca$ shown in \textbf{e}. Shaded area indicates standard - deviations that would be capped in the output $\adapt(t)$ - of the previous transformation pair (see - Fig.\,\ref{fig:log-hp}cd). + \textbf{e}:~$\mu_f$ over $\sca$, once for the noisy case + (solid lines) and once for the noiseless case (dotted + lines). + \textbf{f}:~Noisy case: $\mu_f$ over standard deviation + $\sigma_{\text{adapt}}$ of input $\adapt$ corresponding to + $\sca$ shown in \textbf{e}. Shaded area indicates values + of $\sigma_{\text{adapt}}$ that are capped in the output + $\adapt(t)$ of the previous transformation pair + (Fig.\,\ref{fig:log-hp}cd). } \label{fig:thresh-lp_single} \end{figure} @@ -1148,29 +1147,29 @@ point of $f_i(t)$ less relevant. saturates at different points in feature space.} Same input and processing as in Fig.\,\ref{fig:thresh-lp_single} but with three different - kernels $k_i$, each with a single kernel-specific - threshold value $\thr=0.5\cdot\sigma_{\eta_i}$. + kernels $k_i$ and a single kernel-specific threshold value + $\thr=0.5\cdot\sigma_{\eta_i}$ (appendix + Fig.\,\ref{fig:app_thresh-lp_kern-sd}). \textbf{a}:~Examples of species-specific grasshopper songs. - \textbf{Middle}:~Average value $\mu_{f_i}$ of each feature + \textbf{Middle}:~Average value $\muf$ of each feature $f_i(t)$ over $\sca$ per species (averaged over songs and - recordings, see appendix - Figs.\,\ref{fig:app_thresh-lp_pure} and - \ref{fig:app_thresh-lp_noise}). Different color shades - indicate different kernels $k_i$. Dots indicate $95\,\%$ - curve span per $k_i$. + recordings, appendix Figs.\,\ref{fig:app_thresh-lp_pure} + and \ref{fig:app_thresh-lp_noise}). Different color shades + indicate different $k_i$. Dots indicate $95\,\%$ curve + span per $k_i$. \textbf{b}:~Noiseless case. \textbf{c}:~Noisy case. \textbf{Bottom}:~2D feature spaces spanned by each pair of $f_i(t)$. Each trajectory corresponds to a - species-specific combination of $\mu_{f_i}$ that develops + species-specific combination of $\muf$ that develops with $\sca$ (colorbars). Horizontal dashes in the colorbar indicate $5\,\%$ (dark grey) and $95\,\%$ (light grey) - curve span of the norm across all three $\mu_{f_i}$ per + curve span of the norm across all three $\muf$ per species. \textbf{d}:~Noiseless case. \textbf{e}:~Noisy case. Shaded areas indicate the average - minimum $\mu_{f_i}$ across all species-specific trajectories. + minimum $\muf$ across all species-specific trajectories. } \label{fig:thresh-lp_species} \end{figure} @@ -1255,34 +1254,34 @@ in principle, work together towards an intensity-invariant song representation. \includegraphics[width=\textwidth]{figures/fig_invariance_full_Omocestus_rufipes.pdf} \caption{\textbf{Step-wise emergence of intensity-invariant song representations along the model pathway.} - Input $\raw(t)$ consists of song component $\soc(t)$ - scaled by $\sca$ with added noise component $\noc(t)$ and - is processed up to the feature set $f_i(t)$. Different - color shades indicate different types of Gabor kernels - with specific lobe number $\kn$ and either $+$ or $-$ - sign, sorted (dark to light) first by increasing $\kn$ and - then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ - for each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, - and $16\,$ms per type; 8 types, 40 kernels in total). - \textbf{a}:~Example representations of $\filt(t)$, - $\env(t)$, $\db(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$ - for different $\sca$. - \textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$ - and $f_i(t)$, the median over kernels is shown. Dots + Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$ + with added $\noc(t)$ and is processed up to the feature + set $f_i(t)$ using kernel-specific threshold values + $\thr=2\cdot\sigma_{\eta_i}$ (appendix + Fig.\,\ref{fig:app_full_kern-sd}). Different color shades + indicate different types of Gabor kernels with specific + lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark + to light) first by increasing $\kn$ and then by + sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for + each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and + $16\,$ms per type; 8 types, 40 $k_i(t)$ in total). + \textbf{a}:~Examples of $\filt(t)$, $\env(t)$, $\db(t)$, + $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$. + \textbf{b}:~Intensity measures over $\sca$. The median + over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dots indicate $95\,\%$ curve span for $\db(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$. - \textbf{c}:~Average value $\mu_{f_i}$ of each feature - $f_i(t)$ over $\sca$. - \textbf{d}:~Ratios of intensity measures to the respective - reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$ - and $f_i(t)$, the median over kernel-specific ratios is - shown. - \textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of + \textbf{c}:~Average value $\muf$ of each $f_i(t)$ + over $\sca$. + \textbf{d}:~Ratio of intensity measures from \textbf{b} to + the respective pure-noise reference for $\sca=0$. + \textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of each $c_i(t)$. - \textbf{f}:~Ratios of $\mu_{f_i}$. + \textbf{f}:~Ratio of $\muf$. \textbf{g}:~Distributions of kernel-specific $\sca$ that correspond to $95\,\%$ curve span for $c_i(t)$ and - $f_i(t)$. Dots indicate the values from \textbf{b}. + $f_i(t)$. Dots indicate values based on the median from + \textbf{b}. } \label{fig:pipeline_full} \end{figure} @@ -1337,32 +1336,24 @@ guaranteed simply by disabling logarithmic compression. \includegraphics[width=\textwidth]{figures/fig_invariance_short_Omocestus_rufipes.pdf} \caption{\textbf{Effects of disabling logarithmic compression on intensity invariance along the model pathway.} - Input $\raw(t)$ consists of song component $\soc(t)$ - scaled by $\sca$ with added noise component $\noc(t)$ and - is processed up to the feature set $f_i(t)$, skipping - $\db(t)$. Different color shades indicate different types - of Gabor kernels with specific lobe number $\kn$ and - either $+$ or $-$ sign, sorted (dark to light) first by - increasing $\kn$ and then by - sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for - each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and - $16\,$ms per type; 8 types, 40 kernels in total). - \textbf{a}:~Example representations of $\filt(t)$, - $\env(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for - different $\sca$. - \textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$ - and $f_i(t)$, the median over kernels is shown. Dots - indicate $95\,\%$ curve span for $f_i(t)$. - \textbf{c}:~Average value $\mu_{f_i}$ of each feature - $f_i(t)$ over $\sca$. - \textbf{d}:~Ratios of intensity measures to the respective - reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$ - and $f_i(t)$, the median over kernel-specific ratios is - shown. - \textbf{e}:~Ratios of $\mu_{f_i}$. + Same input and processing as in + Fig.\,\ref{fig:pipeline_full}, using kernel-specific + threshold values $\thr=2\cdot\sigma_{\eta_i}$ (appendix + Fig.\,\ref{fig:app_short_kern-sd}), except that + logarithmic compression and hence $\db(t)$ are skipped. + \textbf{a}:~Examples of $\filt(t)$, $\env(t)$, + $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$. + \textbf{b}:~Intensity measures over $\sca$. The median + over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dot + indicates $95\,\%$ curve span for $f_i(t)$. + \textbf{c}:~Average value $\muf$ of each $f_i(t)$ + over $\sca$. + \textbf{d}:~Ratio of intensity measures from \textbf{b} to + the respective pure-noise reference for $\sca=0$. + \textbf{e}:~Ratio of $\muf$. \textbf{f}:~Distribution of kernel-specific $\sca$ that - correspond to $95\,\%$ curve span for $f_i(t)$. Dots - indicate the value from \textbf{b}. + correspond to $95\,\%$ curve span for $f_i(t)$. Dot + indicates value based on the median from \textbf{b}. } \label{fig:pipeline_short} \end{figure} @@ -1426,26 +1417,27 @@ distances~(Fig.\,\ref{fig:pipeline_field}a, bottom row). Input $\raw(t)$ consists of a song of \textit{P. parallelus} recorded in the field at eight different distances $d$ and is processed up to the feature set - $f_i(t)$. Different color shades indicate different types - of Gabor kernels with specific lobe number $\kn$ and - either $+$ or $-$ sign, sorted (dark to light) first by - increasing $\kn$ and then by + $f_i(t)$ using kernel-specific threshold values + $\thr=2\cdot\sigma_{\eta_i}$ (appendix + Fig.\,\ref{fig:app_field_kern-sd}). Different color shades + indicate different types of Gabor kernels with specific + lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark + to light) first by increasing $\kn$ and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and - $16\,$ms per type; 8 types, 40 kernels in total). + $16\,$ms per type; 8 types, 40 $k_i(t)$ in total). \textbf{a}:~$\filt(t)$, $\env(t)$, $\db(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$ at each $d$. A noise segment from the same recording is shown for reference. - \textbf{b}:~Intensity measures over $d$. For $c_i(t)$ - and $f_i(t)$, the median over kernels is shown. - \textbf{c}:~Average value $\mu_{f_i}$ of each feature - $f_i(t)$ over $d$. - \textbf{d}:~Ratios of intensity measures to the respective - value obtained from the noise reference. For $c_i(t)$ and - $f_i(t)$, the median over kernel-specific ratios is shown. - \textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of + \textbf{b}:~Intensity measures over $d$. The median over + $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. + \textbf{c}:~Average value $\muf$ of each $f_i(t)$ over + $d$. + \textbf{d}:~Ratio of intensity measures from \textbf{b} to + the respective value obtained from the noise reference. + \textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of each $c_i(t)$. - \textbf{f}:~Ratios of $\mu_{f_i}$. + \textbf{f}:~Ratios of $\muf$. } \label{fig:pipeline_field} \end{figure} @@ -1470,8 +1462,8 @@ song, so that each dot within a subplot corresponds to a single feature $f_i(t)$. For the intraspecific comparisons~(Fig.\,\ref{fig:feat_cross_species}, upper triangular), the pairs of $\muf$ are distributed closely around the diagonal, with a minimum -correlation coefficient of $\rho=0.85$, a maximum of $\rho=0.99$, and a median -of $\rho=0.92$. A given $f_i(t)$ thus tends to have a similar $\muf$ across +correlation coefficient of $\rho=0.82$, a maximum of $\rho=0.99$, and a median +of $\rho=0.91$. A given $f_i(t)$ thus tends to have a similar $\muf$ across different songs of the same species. In contrast, the pairs of $\muf$ for the interspecific comparisons~(Fig.\,\ref{fig:feat_cross_species}, lower triangular) are distributed in a variety of different ways, most in broader @@ -1479,7 +1471,7 @@ clouds (e.g. \textit{C. biguttulus} vs. \textit{C. mollis}) but some more narrowly around the diagonal (e.g. \textit{P. parallelus} vs. \textit{C. dispar}). The correlation coefficients $\rho$ vary widely between different interspecific comparisons, with a minimum of $\rho=-0.1$, a maximum of -$\rho=0.92$, and a median of $\rho=0.53$. A given $f_i(t)$ therefore tends to +$\rho=0.91$, and a median of $\rho=0.40$. A given $f_i(t)$ therefore tends to have a less similar $\muf$ across different species than within the same species, although certain exeptions exist~(Fig.\,\ref{fig:feat_cross_species}, lower right). Accordingly, the feature representation that is generated by the @@ -1498,18 +1490,17 @@ natural song variation. \centering \includegraphics[width=\textwidth]{figures/fig_features_cross_species.pdf} \caption{\textbf{Interspecific and intraspecific feature variability.} - Average value $\mu_{f_i}$ of each feature $f_i(t)$ against - its counterpart from a 2nd feature set based on a - different input $\raw(t)$. Each dot within a subplot - represents a single feature $f_i(t)$. Different color + Average value $\muf$ of each feature $f_i(t)$ against its + counterpart from a 2nd feature set based on a different + input $\raw(t)$. Data is based on the saturated $\muf$ + from Fig.\,\ref{fig:pipeline_full}. Each dot within a + subplot represents a single $f_i(t)$. Different color shades indicate different types of Gabor kernels with specific lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark to light) first by increasing $\kn$ and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and - $16\,$ms per type; 8 types, 40 kernels in total). Data is - based on the analysis underlying - Fig\,\ref{fig:pipeline_full}. + $16\,$ms per type; 8 types, 40 kernels in total). \textbf{Lower triangular}:~Interspecific comparisons between single songs of different species. \textbf{Upper triangular}:~Intraspecific comparisons @@ -1524,7 +1515,8 @@ natural song variation. \end{figure} \FloatBarrier -\section{Conclusions \& outlook} +\newpage +\section{Discussion} % RIPPED FROM INTRODUCTION: @@ -1677,105 +1669,189 @@ $\rightarrow$ Graded again but highly decorrelated from the acoustic stimulus\\ $\rightarrow$ Parameters of a behavioral response may be graded (e.g. approach speed), initiation of one behavior over another is categorical (e.g. approach/stay) +\newpage +\section{Appendix} +% Not sure if we really need this one. Might raise more questions than it +% provides answers. The noise component is not stable throughout nonlinear +% transformations, that is all the reader needs to know, i believe. \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_noise_env_sd_conversion_appendix.pdf} - \caption{\textbf{} - } + \caption{\textbf{Conversion of the noise component by envelope extraction.} + Standard deviation $\sigma_{\eta}$ of noise component + $\noc(t)$ within the signal envelope $\env(t)$ over scale + $\sca$. Based on input $\raw(t)$ with $\sigma_{\eta}=1$ + (corresponding to the analysis underlying + Fig.\,\ref{fig:rect-lp}), using 100 realizations of + $\noc(t)$.} \label{fig:app_env-sd} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_invariance_rect-lp_appendix.pdf} - \caption{\textbf{} - } + \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:rect-lp}e.} + Ratio of the standard deviation $\sigma_{\text{env}}$ to + the pure-noise reference $\sigma_{\eta}$ of the signal + envelope $\env(t)$ over scale $\sca$ for different cutoff + frequencies $\fc$ of the lowpass filter extracting + $\env(t)$. Solid lines and shaded areas indicate mean + $\pm$ standard deviation across songs per recording. + Dashed lines indicate mean across recordings (shown in + Fig.\,\ref{fig:rect-lp}e).} \label{fig:app_rect-lp} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_invariance_log-hp_appendix.pdf} - \caption{\textbf{} - } + \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:log-hp}e.} + Ratio of the standard deviation $\sigma_{\text{adapt}}$ to + the pure-noise reference $\sigma_{\eta}$ of the + intensity-adapted envelope $\adapt(t)$ over scale $\sca$. + Solid lines and shaded areas indicate mean $\pm$ standard + deviation across songs per recording. Dashed lines + indicate mean across recordings (shown in + Fig.\,\ref{fig:log-hp}e).} \label{fig:app_log-hp_curves} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_saturation_log-hp_appendix.pdf} - \caption{\textbf{} - } + \caption{\textbf{Species-specific saturation points underlying + Fig.\,\ref{fig:log-hp}e.} + Distribution of saturation points ($95\,\%$ curve span) of + ratio $\sigma_{\text{adapt}} / \sigma_{\eta}$ of the + intensity-adapted envelope $\adapt(t)$ over scale $\sca$ + across all available songs. Dots indicate the saturation + point of the mean curve across songs and recordings (shown + in Fig.\,\ref{fig:log-hp}e, see also appendix + Fig.\,\ref{fig:app_log-hp_curves}).} \label{fig:app_log-hp_saturation} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_pure_appendix.pdf} - \caption{\textbf{} - } + \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}bd.} + Average value $\muf$ of each of the three features + $f_i(t)$ over scale $\sca$ in the noiseless case. Solid + lines and shaded areas indicate mean $\pm$ standard + deviation across songs per recording. Dashed lines + indicate mean across recordings (shown in + Fig.\,\ref{fig:thresh-lp_species}bd).} \label{fig:app_thresh-lp_pure} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_noise_appendix.pdf} - \caption{\textbf{} - } + \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}ce.} + Average value $\muf$ of each of the three features + $f_i(t)$ over scale $\sca$ in the noisy case. Solid lines + and shaded areas indicate mean $\pm$ standard deviation + across songs per recording. Dashed lines indicate mean + across recordings (shown in + Fig.\,\ref{fig:thresh-lp_species}ce).} \label{fig:app_thresh-lp_noise} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf} - \caption{\textbf{} + \caption{\textbf{Relation between threshold value and pure-noise feature + value for Fig.\,\ref{fig:thresh-lp_single} and + Fig.\,\ref{fig:thresh-lp_species}.} + Proportion of pure-noise kernel response $c_i(t)$ that + exceeds threshold value $\thr$ --- which determines the + average value $\muf$ of feature $f_i(t)$ --- over $\thr$ + in multiples of standard deviation $\sigma_{c_i}$. + Corresponds to a "reverse" cumulative distribution + function of $c_i(t)$. Black solid lines indicate rCDF per + kernel $k_i(t)$. Red dashed line indicates rCDF for a + normal distribution with $\mu=0$ and $\sigma=1$. } \label{fig:app_thresh-lp_kern-sd} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_full_appendix.pdf} - \caption{\textbf{} + \caption{\textbf{Relation between threshold value and pure-noise feature + value for Fig.\,\ref{fig:pipeline_full}.} + Proportion of pure-noise kernel response $c_i(t)$ that + exceeds threshold value $\thr$ --- which determines the + average value $\muf$ of feature $f_i(t)$ --- over $\thr$ + in multiples of standard deviation $\sigma_{c_i}$. + Corresponds to a "reverse" cumulative distribution + function of $c_i(t)$. Black solid lines indicate rCDF per + kernel $k_i(t)$. Red dashed line indicates rCDF for a + normal distribution with $\mu=0$ and $\sigma=1$. } \label{fig:app_full_kern-sd} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_short_appendix.pdf} - \caption{\textbf{} + \caption{\textbf{Relation between threshold value and pure-noise feature + value for Fig.\,\ref{fig:pipeline_short}.} + Proportion of pure-noise kernel response $c_i(t)$ that + exceeds threshold value $\thr$ --- which determines the + average value $\muf$ of feature $f_i(t)$ --- over $\thr$ + in multiples of standard deviation $\sigma_{c_i}$. + Corresponds to a "reverse" cumulative distribution + function of $c_i(t)$. Black solid lines indicate rCDF per + kernel $k_i(t)$. Red dashed line indicates rCDF for a + normal distribution with $\mu=0$ and $\sigma=1$. } \label{fig:app_short_kern-sd} -\end{figure} +\end{figure}% Referenced. \FloatBarrier \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_field_appendix.pdf} - \caption{\textbf{} + \caption{\textbf{Relation between threshold value and pure-noise feature + value for Fig.\,\ref{fig:pipeline_field}.} + Proportion of pure-noise kernel response $c_i(t)$ that + exceeds threshold value $\thr$ --- which determines the + average value $\muf$ of feature $f_i(t)$ --- over $\thr$ + in multiples of standard deviation $\sigma_{c_i}$. + Corresponds to a "reverse" cumulative distribution + function of $c_i(t)$. Black solid lines indicate rCDF per + kernel $k_i(t)$. Red dashed line indicates rCDF for a + normal distribution with $\mu=0$ and $\sigma=1$. } \label{fig:app_field_kern-sd} -\end{figure} +\end{figure}% Referenced. \FloatBarrier - \begin{figure}[!ht] \centering \includegraphics[width=\textwidth]{figures/fig_invariance_cross_species_thresh_appendix.pdf} - \caption{\textbf{} + \caption{\textbf{Threshold-dependent intensity invariance of + species-specific feature sets.} + Same input and processing as in + Fig.\,\ref{fig:pipeline_full}, using different + kernel-specific threshold values $\thr$ (multiples of + pure-noise standard deviation $\sigma_{\eta_i}$ of + $c_i(t)$ for $\sca=0$. See also appendix + Fig.\,\ref{fig:app_full_kern-sd}). Average value $\muf$ of + each feature $f_i(t)$ over $\sca$. } \label{fig:app_cross_species_thresh} -\end{figure} +\end{figure}% Reference this one! \FloatBarrier \end{document} \ No newline at end of file diff --git a/python/fig_env_sd_conversion_appendix.py b/python/fig_env_sd_conversion_appendix.py index 8a14a97..58aa82a 100644 --- a/python/fig_env_sd_conversion_appendix.py +++ b/python/fig_env_sd_conversion_appendix.py @@ -1,7 +1,7 @@ import plotstyle_plt import numpy as np import matplotlib.pyplot as plt -from plot_functions import xlabel, ylabel, strip_zeros, letter_subplots +from plot_functions import xlabel, ylabel # GENERAL SETTINGS: data_path = '../data/inv/noise_env/sd_conversion.npz' @@ -10,16 +10,14 @@ save_path = '../figures/fig_noise_env_sd_conversion_appendix.pdf' # PLOT SETTINGS: fig_kwargs = dict( figsize=(32/2.54, 16/2.54), - nrows=2, + nrows=1, ncols=1, - sharex=True, - sharey=True, gridspec_kw=dict( wspace=0, - hspace=0.1, - left=0.09, + hspace=0, + left=0.08, right=0.98, - bottom=0.08, + bottom=0.1, top=0.95, ) ) @@ -30,81 +28,41 @@ grid_line_kwargs = dict( color='k', lw=0.5, ) -trial_kwargs = dict( - color='k', - alpha=0.5, - lw=0.5, -) line_kwargs = dict( - color='black', - lw=1, -) -fill_kwargs = dict( - color='k', + c='k', + lw=0.5, alpha=0.5, ) -xlabels = dict( - bottom='$\\text{scale }\\alpha$', -) -ylabels = dict( - top='$\\sigma_{\\eta}\\,(PLACEHOLDER \\,\\text{realizations})$', - bottom='$\\sigma_{\\eta}\\,(\\text{mean}\\,\\pm\\,\\text{SD})$', -) +xlab = '$\\text{scale }\\alpha$' xlab_kwargs = dict( y=0, fontsize=20, ha='center', va='bottom', ) +ylab = '$\\sigma_{\\eta}$' ylab_kwargs = dict( x=0, fontsize=20, ha='center', va='top', ) -title_kwargs = dict( - t='$\\sigma_{\\text{filt}}\\,=$', - x=0.5, - y=1, - ha='center', - va='top', - fontsize=20, -) -letter_kwargs = dict( - x=0.005, - y=0.99, - fontsize=22, - ha='left', - va='top', -) # Fetch data: data = dict(np.load('../data/inv/noise_env/sd_conversion.npz')) -n = data['n_trials'] - -# Adjust parameters: -ylabels['top'] = f'$\\sigma_{{\\eta}}\\,({data["n_trials"]}\\text{{ realizations}})$' -title_kwargs['t'] += f'$\\,{strip_zeros(data["sd_factor"])}$' # Prepare graph: -fig, (ax1, ax2) = plt.subplots(**fig_kwargs) -fig.suptitle(**title_kwargs) -ax1.grid(**grid_line_kwargs) -ax1.set_xlim(data['scales'][0], data['scales'][-1]) -ax1.set_xscale('symlog', linthresh=data['scales'][1], linscale=0.5) -ax1.set_ylim(0, 0.1) -ylabel(ax1, ylabels['top'], transform=fig.transFigure, **ylab_kwargs) -ax2.grid(**grid_line_kwargs) -xlabel(ax2, xlabels['bottom'], transform=fig.transFigure, **xlab_kwargs) -ylabel(ax2, ylabels['bottom'], transform=fig.transFigure, **ylab_kwargs) -letter_subplots((ax1, ax2), **letter_kwargs) +fig, ax = plt.subplots(**fig_kwargs) +ax.grid(**grid_line_kwargs) +ax.set_xlim(data['scales'][0], data['scales'][-1]) +ax.set_xscale('symlog', linthresh=data['scales'][1], linscale=0.5) +ax.set_ylim(0, 0.08) +ax.yaxis.set_major_locator(plt.MultipleLocator(0.02)) +xlabel(ax, xlab, transform=fig.transFigure, **xlab_kwargs) +ylabel(ax, ylab, transform=fig.transFigure, **ylab_kwargs) # Plot individual trials: -ax1.plot(data['scales'], data['trials'], **trial_kwargs) - -# Plot mean and spread across trials: -ax2.plot(data['scales'], data['mean'], **line_kwargs) -ax2.fill_between(data['scales'], data['mean'] - data['spread'], data['mean'] + data['spread'], **fill_kwargs) +ax.plot(data['scales'], data['sd_noise'][..., 0], **line_kwargs) if save_path is not None: fig.savefig(save_path) diff --git a/python/fig_features_cross_species.py b/python/fig_features_cross_species.py index e256fd5..1539fc3 100644 --- a/python/fig_features_cross_species.py +++ b/python/fig_features_cross_species.py @@ -45,7 +45,7 @@ save_path = '../figures/fig_features_cross_species.pdf' # ANALYSIS SETTINGS: -thresh_rel = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3])[5] +thresh_rel = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3])[4] single_spec_file = True # Only use example files for cross-species comparison equalize_spec_files = False # Prune to minimum available across species n_song = n_spec#None # Limit to n first songs of in-species dataset (None for all) diff --git a/python/fig_inv_cross_spec-thresh_appendix.py b/python/fig_inv_cross_spec-thresh_appendix.py index 7feb15d..21872d4 100644 --- a/python/fig_inv_cross_spec-thresh_appendix.py +++ b/python/fig_inv_cross_spec-thresh_appendix.py @@ -13,9 +13,9 @@ from IPython import embed # GENERAL SETTINGS: target_species = [ - # 'Chorthippus_biguttulus', - # 'Chorthippus_mollis', - # 'Chrysochraon_dispar', + 'Chorthippus_biguttulus', + 'Chorthippus_mollis', + 'Chrysochraon_dispar', # 'Euchorthippus_declivus', 'Gomphocerippus_rufus', 'Omocestus_rufipes', @@ -35,7 +35,15 @@ save_path = '../figures/fig_invariance_cross_species_thresh_appendix.pdf' # ANALYSIS SETTINGS: exclude_zero = True -thresh_rel = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3]) +thresh_rel = np.array([ + # 0, + 0.5, + 1, + # 1.5, + 2, + # 2.5, + 3, +]) # SUBSET SETTINGS: types = np.array([1, -1, 2, -2, 3, -3, 4, -4]) @@ -53,15 +61,15 @@ fig_kwargs = dict( sharex=True, sharey=True, gridspec_kw=dict( - wspace=0.2, - hspace=0.75, + wspace=0.3, + hspace=0.5, left=0.1, - right=0.95, - bottom=0.08, + right=0.97, + bottom=0.1, top=0.98, ) ) -inset_x_bounds = [0, -0.5, 1, 0.4] +inset_x_bounds = [0, -0.3, 1, 0.25] inset_y_bounds = [1.01, 0, 0.1, 1] # PLOT SETTINGS: @@ -162,6 +170,7 @@ y_dist_kwargs = dict( fig, axes = plt.subplots(**fig_kwargs) axes[0, 0].set_ylim(0, 1) axes[0, 0].yaxis.set_major_locator(plt.MultipleLocator(yloc)) +axes[0, 0].xaxis.set_major_locator(plt.LogLocator(base=10, subs=(1,))) super_xlabel(xlab, fig, axes[-1, 0], axes[-1, -1], **xlab_kwargs) super_ylabel(ylab, fig, axes[0, 0], axes[-1, 0], **ylab_super_kwargs) for ax, species in zip(axes[0, :], target_species): @@ -197,25 +206,27 @@ for i, species in enumerate(target_species): symlog_kwargs = dict(linthresh=scales[scales > 0][0], linscale=0.5) # Run through thresholds: - for j in range(thresh_rel.size): + for j, thresh in enumerate(thresh_rel): ax = axes[j, i] + ind = np.nonzero(data['thresh_rel'] == thresh)[0][0] + # Plot swarm of feature-specific intensity curves: - handles = ax.plot(scales, measure[:, :, j], lw=lw['swarm']) + handles = ax.plot(scales, measure[:, :, ind], lw=lw['swarm']) assign_colors(handles, config['k_specs'][:, 0], kern_colors) - reorder_by_sd(handles, measure[:, :, j]) + reorder_by_sd(handles, measure[:, :, ind]) # Plot single compressed intensity curve: - compressed = np.median(measure[:, :, j], axis=1) + compressed = np.median(measure[:, :, ind], axis=1) ax.plot(scales, compressed, **median_kwargs) # Plot distribution of saturation levels: inset = ax.inset_axes(inset_y_bounds) inset.set_ylim(0, 1) inset.axis('off') - y_dist(inset, measure[-1, :, j], **y_dist_kwargs) + y_dist(inset, measure[-1, :, ind], **y_dist_kwargs) # Plot distribution of saturation points: - crit_inds = np.array(get_saturation(measure[:, :, j], **plateau_settings)[1]) + crit_inds = np.array(get_saturation(measure[:, :, ind], **plateau_settings)[1]) if np.isnan(crit_inds).sum(): print(f'WARNING: No saturation points found for {species} at threshold {thresh_rel[j]}') crit_inds = crit_inds[~np.isnan(crit_inds)].astype(int) @@ -223,12 +234,13 @@ for i, species in enumerate(target_species): inset = ax.inset_axes(inset_x_bounds) inset.set_xlim(scales[0], scales[-1]) inset.set_xscale('symlog', **symlog_kwargs) + inset.xaxis.set_major_locator(plt.LogLocator(base=10, subs=(1,))) hide_axis(inset, 'left') if j < thresh_rel.size - 1: hide_ticks(inset, 'bottom') x_dist(inset, crit_scales, **x_dist_kwargs) - if j > 0: + if thresh > 0: # Plot single saturation point: crit_ind = get_saturation(compressed, **plateau_settings)[1] crit_scale = scales[crit_ind] @@ -237,6 +249,7 @@ for i, species in enumerate(target_species): # Posthocs: axes[0, 0].set_xscale('symlog', **symlog_kwargs) axes[0, 0].set_xlim(scales[0], scales[-1]) +axes[0, 0].xaxis.set_major_locator(plt.LogLocator(base=10, subs=(1,))) if save_path is not None: fig.savefig(save_path) diff --git a/python/fig_kernel_sd_perc_appendix.py b/python/fig_kernel_sd_perc_appendix.py index 02f20c9..f2b0974 100644 --- a/python/fig_kernel_sd_perc_appendix.py +++ b/python/fig_kernel_sd_perc_appendix.py @@ -6,13 +6,13 @@ from plot_functions import xlabel, ylabel from IPython import embed # Analysis settings: -mode = ['thresh_lp', 'full', 'short', 'field'][3] +mode = ['thresh_lp', 'full', 'short', 'field'][0] thresh_path = f'../data/inv/{mode}/thresholds.npz' save_path = f'../figures/fig_kernel_sd_perc_{mode}_appendix.pdf' # Plot settings: fig_kwargs = dict( - figsize=(32/2.54, 16/2.54), + figsize=(32/2.54, 15/2.54), nrows=1, ncols=1, gridspec_kw=dict( @@ -41,8 +41,8 @@ grid_line_kwargs = dict( color='k', lw=0.5, ) -xlab = '$\\text{multiple of }\\sigma_{k_i}$' -ylab = '$P\\,(c_i > \\Theta_i)$' +xlab = '$\\Theta_i\\,[\\text{multiples of }\\sigma_{c_i}]$' +ylab = '$\\mu_{f_i}\\,\\approx\\,P\\,(c_i > \\Theta_i)$' xlab_kwargs = dict( y=0, fontsize=20, @@ -55,6 +55,8 @@ ylab_kwargs = dict( ha='center', va='top', ) +xloc = 1 +yloc = 0.25 # Load threshold data: data = dict(np.load(thresh_path)) @@ -69,6 +71,8 @@ fig, ax = plt.subplots(**fig_kwargs) ax.grid(**grid_line_kwargs) ax.set_xlim(factors[0], factors[-1]) ax.set_ylim(-0.01, 1.01) +ax.xaxis.set_major_locator(plt.MultipleLocator(xloc)) +ax.yaxis.set_major_locator(plt.MultipleLocator(yloc)) ylabel(ax, ylab, transform=fig.transFigure, **ylab_kwargs) xlabel(ax, xlab, transform=fig.transFigure, **xlab_kwargs) diff --git a/python/fig_pathway_stages.py b/python/fig_pathway_stages.py index d0ac347..fdf9ca5 100644 --- a/python/fig_pathway_stages.py +++ b/python/fig_pathway_stages.py @@ -17,7 +17,7 @@ stages = ['filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat'] save_path = '../figures/' # GRAPH SETTINGS: -fig_kwargs = dict( +fig_pre_kwargs = dict( figsize=(32/2.54, 16/2.54), sharex='col', subplot_kw=dict( @@ -28,10 +28,17 @@ fig_kwargs = dict( hspace=0.3, left=0.12, right=0.99, - bottom=0.08, - top=0.95 + bottom=0.09, + top=0.97 ), ) +fig_feat_kwargs = fig_pre_kwargs.copy() +fig_feat_kwargs['gridspec_kw'] = fig_pre_kwargs['gridspec_kw'].copy() +fig_feat_kwargs['gridspec_kw'].update(dict( + left=0.09, + wspace=0.15, + hspace=0.2, +)) # PLOT SETTINGS: fs = dict( @@ -76,13 +83,15 @@ xlab_kwargs = dict( va='bottom', fontsize=fs['lab_norm'], ) -ylab_kwargs = dict( +ylab_pre_kwargs = dict( x=0.03, rotation=0, ha='center', va='center', fontsize=fs['lab_tex'], ) +ylab_feat_kwargs = ylab_pre_kwargs.copy() +ylab_feat_kwargs['x'] = 0.02 xloc = dict( full=2, zoom=0.2 @@ -98,42 +107,28 @@ yloc_full = dict( yloc_zoom = dict( filt=0.1, env=0.02, - log=50, + log=25, inv=10, conv=0.5, feat=1 ) letter_kwargs = dict( - x=0, + xref=0, y=1, ha='left', - va='bottom', + va='center', fontsize=fs['letter'], ) -zoom_rel = np.array([0.3, 0.4]) +zoom_rel = np.array([0.295, 0.4]) zoom_kwargs = dict( color=3 * (0.85,), zorder=0, linewidth=0 ) -# kernels = np.array([ -# [1, 0.002], -# [1, 0.016], -# [-1, 0.004], -# [-1, 0.032], -# [2, 0.004], -# [2, 0.016], -# [-2, 0.002], -# [-2, 0.032], -# [3, 0.008], -# [3, 0.032], -# [-3, 0.008], -# [-3, 0.032], -# [4, 0.004], -# [4, 0.032], -# [-4, 0.004], -# [-4, 0.032] -# ]) +types = np.array([1, -1, 2, -2, 3, -3, 4, -4]) +# types = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8, -8, 9, -9, 10, -10] +sigmas = np.array([0.004, 0.032]) +# sigmas = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032] t = [1, -1, 2, -2, 3, -3, 4, -4] s = [0.004, 0.032] kernels = np.array([[i, j] for i in t for j in s]) @@ -162,31 +157,31 @@ for data_path in data_paths: # PART I: PREPROCESSING STAGE - fig, axes = plt.subplots(4, 2, **fig_kwargs) + fig, axes = plt.subplots(4, 2, **fig_pre_kwargs) super_xlabel(xlabels['super'], fig, axes[0, 0], axes[0, -1], **xlab_kwargs) [hide_axis(ax, 'bottom') for ax in axes[:-1, :].ravel()] # Bandpass-filtered signal: ax_full, ax_zoom = axes[0, :] - ylabel(ax_full, ylabels['filt'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['filt'], transform=fig.transFigure, **ylab_pre_kwargs) plot_line(ax_full, t_full, data['filt'], c=colors['filt'], lw=lw_full['filt'], yloc=yloc_full['filt']) plot_line(ax_zoom, t_zoom, data['filt'][zoom_mask], c=colors['filt'], lw=lw_zoom['filt'], yloc=yloc_zoom['filt']) # Signal envelope: ax_full, ax_zoom = axes[1, :] - ylabel(ax_full, ylabels['env'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['env'], transform=fig.transFigure, **ylab_pre_kwargs) plot_line(ax_full, t_full, data['env'], ymin=0, c=colors['env'], lw=lw_full['env'], yloc=yloc_full['env']) plot_line(ax_zoom, t_zoom, data['env'][zoom_mask], ymin=0, c=colors['env'], lw=lw_zoom['env'], yloc=yloc_zoom['env']) # Logarithmic envelope: ax_full, ax_zoom = axes[2, :] - ylabel(ax_full, ylabels['log'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['log'], transform=fig.transFigure, **ylab_pre_kwargs) plot_line(ax_full, t_full, data['log'], ymax=0, c=colors['log'], lw=lw_full['log'], yloc=yloc_full['log']) - plot_line(ax_zoom, t_zoom, data['log'][zoom_mask], ymax=0, c=colors['log'], lw=lw_zoom['log'], yloc=yloc_zoom['log']) + plot_line(ax_zoom, t_zoom, data['log'][zoom_mask], c=colors['log'], lw=lw_zoom['log'], yloc=yloc_zoom['log']) # Adapted envelope: ax_full, ax_zoom = axes[3, :] - ylabel(ax_full, ylabels['inv'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['inv'], transform=fig.transFigure, **ylab_pre_kwargs) plot_line(ax_full, t_full, data['inv'], c=colors['inv'], lw=lw_full['inv'], yloc=yloc_full['inv']) plot_line(ax_zoom, t_zoom, data['inv'][zoom_mask], c=colors['inv'], lw=lw_zoom['inv'], yloc=yloc_zoom['inv']) @@ -197,25 +192,17 @@ for data_path in data_paths: ax_zoom.xaxis.set_major_locator(plt.MultipleLocator(xloc['zoom'])) indicate_zoom(fig, axes[0, 0], axes[-1, 0], zoom_abs, **zoom_kwargs) indicate_zoom(fig, axes[0, 1], axes[-1, 1], zoom_abs, **zoom_kwargs) - letter_subplots(axes[:, 0], **letter_kwargs) + letter_subplots(axes[:, 0], ref=fig.transFigure, **letter_kwargs) if save_path is not None: fig.savefig(f'{save_path}fig_pre_stages.pdf') - # Update parameters: - fig_kwargs['gridspec_kw'].update( - left=0.09, - ) - ylab_kwargs.update( - x=0.02, - ) - # PART II: FEATURE EXTRACTION STAGE: - fig, axes = plt.subplots(3, 2, **fig_kwargs) + fig, axes = plt.subplots(3, 2, **fig_feat_kwargs) super_xlabel(xlabels['super'], fig, axes[0, 0], axes[0, -1], **xlab_kwargs) # Convolutional filter responses: ax_full, ax_zoom = axes[0, :] - ylabel(ax_full, ylabels['conv'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['conv'], transform=fig.transFigure, **ylab_feat_kwargs) signal = data['conv'][:, kern_inds] handles = plot_line(ax_full, t_full, signal, lw=lw_full['conv'], yloc=yloc_full['conv']) assign_colors(handles, kern_specs[:, 0], conv_colors) @@ -228,7 +215,7 @@ for data_path in data_paths: # Binary responses: ax_full, ax_zoom = axes[1, :] - ylabel(ax_full, ylabels['bi'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['bi'], transform=fig.transFigure, **ylab_feat_kwargs) signal = data['bi'][:, kern_inds] handles = plot_barcode(ax_full, t_full, signal, lw=lw_full['bi']) assign_colors(handles, kern_specs[:, 0], bi_colors) @@ -237,7 +224,7 @@ for data_path in data_paths: # Finalized features: ax_full, ax_zoom = axes[2, :] - ylabel(ax_full, ylabels['feat'], transform=fig.transFigure, **ylab_kwargs) + ylabel(ax_full, ylabels['feat'], transform=fig.transFigure, **ylab_feat_kwargs) signal = data['feat'][:, kern_inds] handles = plot_line(ax_full, t_full, signal, ymin=0, ymax=1, c=colors['feat'], lw=lw_full['feat'], yloc=yloc_full['feat']) assign_colors(handles, kern_specs[:, 0], feat_colors) @@ -251,7 +238,7 @@ for data_path in data_paths: ax_zoom.xaxis.set_major_locator(plt.MultipleLocator(xloc['zoom'])) indicate_zoom(fig, axes[0, 0], axes[-1, 0], zoom_abs, **zoom_kwargs) indicate_zoom(fig, axes[0, 1], axes[-1, 1], zoom_abs, **zoom_kwargs) - letter_subplots(axes[:, 0], **letter_kwargs) + letter_subplots(axes[:, 0], ref=fig.transFigure, **letter_kwargs) if save_path is not None: fig.savefig(f'{save_path}fig_feat_stages.pdf') plt.show() diff --git a/python/fig_saturation_log-hp_appendix.py b/python/fig_saturation_log-hp_appendix.py index ebe77bf..301e6b5 100644 --- a/python/fig_saturation_log-hp_appendix.py +++ b/python/fig_saturation_log-hp_appendix.py @@ -90,6 +90,12 @@ text_kwargs = dict( ha='right', va='top', ) +plateau_dot_kwargs = dict( + marker='o', + markersize=8, + markeredgewidth=1, + clip_on=False, +) # Prepare graph: fig, axes = plt.subplots(**fig_kwargs) @@ -111,12 +117,22 @@ for species, ax in zip(target_species, axes): # Plot distribution of saturation points: handles.append(ax.bar(bins, hist, width=bins[1] - bins[0], fc=color, **bar_kwargs)) ax.set_ylim(0, hist.max() * 1.05) + if species == 'Gomphocerippus_rufus': + ax.yaxis.set_major_locator(plt.MultipleLocator(0.05)) + else: + ax.yaxis.set_major_locator(plt.MultipleLocator(0.03)) # Indicate mean of distribution: - ax.axvline(data['crit_scales'].mean(), **mean_kwargs) + # ax.axvline(data['crit_scales'].mean(), **mean_kwargs) # Indicate number of songs: - ax.text(**text_kwargs, s=f'n = {n_songs}', transform=ax.transAxes) + ax.text(**text_kwargs, s=f'n={n_songs}', transform=ax.transAxes) + + # Indicate saturation point of condensed curve: + ax.plot(data['crit_scale'], 0, c='w', alpha=1, zorder=5.5, + transform=ax.get_xaxis_transform(), **plateau_dot_kwargs) + ax.plot(data['crit_scale'], 0, mfc=color, mec='k', alpha=0.75, zorder=6, + transform=ax.get_xaxis_transform(), **plateau_dot_kwargs) # Posthocs: labels = [shorten_species(species) for species in target_species] diff --git a/python/save_env_sd_conversion.py b/python/save_env_sd_conversion.py index 9ab914f..9a04f9d 100644 --- a/python/save_env_sd_conversion.py +++ b/python/save_env_sd_conversion.py @@ -1,26 +1,24 @@ -import glob import numpy as np -import matplotlib.pyplot as plt +from thunderhopper.filetools import search_files from thunderhopper.modeltools import load_data from thunderhopper.filters import sosfilter from IPython import embed # GENERAL SETTINGS: target = 'Omocestus_rufipes' -data_path = glob.glob(f'../data/processed/{target}*.npz')[0] +data_path = search_files(target, dir='../data/processed/')[0] save_path = '../data/inv/noise_env/' # ANALYSIS SETTINGS: -scales = np.geomspace(0.1, 10000, 200) +scales = np.geomspace(0.01, 1000, 100) sd_inputs = np.array([1.0]) -n_trials = 10 -tol_to_one = 0.1 +n_trials = 100 # EXECUTION: # Load signal data: -data, config = load_data(data_path, files='filt') -signal, rate = data['filt'], config['rate'] +data, config = load_data(data_path, files='raw') +signal, rate = data['raw'], config['rate'] # Reduce to song segment and normalize: time = np.arange(signal.shape[0]) / rate @@ -28,67 +26,60 @@ start, end = data['songs_0'].ravel() segment = (time >= start) & (time <= end) signal /= signal[segment].std() -# Get rescaled signals (time, scale): +# Rescale signal (time, scale): signal = signal[:, None] * scales[None, :] # Prepare storage: -if sd_inputs.size > 1: - current_match = 0 - storage = dict( - scales=scales, - n_trials=n_trials, - sd_factor=np.array([0.]), - trials=np.zeros((scales.size, n_trials), dtype=float), - mean=np.zeros(scales.size, dtype=float), - spread=np.zeros(scales.size, dtype=float), - ) +sd_noise = np.zeros((scales.size, n_trials, sd_inputs.size), dtype=float) # Analyze piece-wise: rng = np.random.default_rng() for i, sigma in enumerate(sd_inputs): print(f'Testing SD: {sigma:.3f} ...') - # Add Gaussian noise of given SD to rescaled signals (time, scale, trial): - mix = signal[..., None] + rng.normal(0, sigma, (*signal.shape, n_trials)) + # Prepare trial storage: + sd_trials = np.zeros((segment.sum(), scales.size, n_trials), dtype=float) - # Get mixture envelopes (time, scale, trial): - mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp', - padtype='even', padlen=config['padlen'])[segment, ...] + # Run trials: + for j in range(n_trials): + # Mix signals with white noise of target SD: + mix = signal + rng.normal(0, sigma, signal.shape) + + # Process mixture: + mix = sosfilter(mix, rate, config['bp_fcut'], 'bp', + padtype='fixed', padlen=config['padlen']) + mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp', + padtype='even', padlen=config['padlen']) + + # Log current trial: + sd_trials[..., j] = mix[segment, :] # Get noise remainders of mean over trials: - mix -= mix.mean(axis=-1, keepdims=True) + sd_trials -= sd_trials.mean(axis=-1, keepdims=True) # Estimate noise SD: - sd = mix.std(axis=0) - # Average SD over trials: - mean_sd = sd.mean(axis=-1) + sd_noise[:, :, i] = sd_trials.std(axis=0) - # Log single-run results: - if sd_inputs.size == 1: - storage = dict( - scales=scales, - n_trials=n_trials, - sd_factor=sigma, - trials=sd, - mean=mean_sd, - spread=sd.std(axis=-1), - ) - break + # # Add Gaussian noise of given SD to rescaled signals (time, scale, trial): + # mix = signal[..., None] + rng.normal(0, sigma, (*signal.shape, n_trials)) - # Update multi-run results if better than previous: - n_match = (np.abs(1 - mean_sd) <= tol_to_one).sum() - if n_match > current_match: - print(f'Found better SD: {sigma:.3f} with {n_match} matches (previous: {current_match})') - storage['sd_factor'][0] = sigma - storage['trials'][:, :] = sd - storage['mean'][:] = mean_sd - storage['spread'][:] = sd.std(axis=-1) - current_match = n_match - del mix -del signal + # # Get mixture envelopes (time, scale, trial): + # mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp', + # padtype='even', padlen=config['padlen'])[segment, ...] + + # # Get noise remainders of mean over trials: + # mix -= mix.mean(axis=-1, keepdims=True) + + # # Estimate noise SD: + # sd_noise[:, :, i] = mix.std(axis=0) if save_path is not None: - np.savez(save_path + 'sd_conversion.npz', **storage) + archive = dict( + scales=scales, + sd_input=sd_inputs, + sd_noise=sd_noise, + ) + np.savez(save_path + 'sd_conversion.npz', **archive) print('Done.') embed() diff --git a/python/save_saturation_log-hp.py b/python/save_saturation_log-hp.py index 3c73b8b..17dd3a4 100644 --- a/python/save_saturation_log-hp.py +++ b/python/save_saturation_log-hp.py @@ -14,7 +14,8 @@ target_species = [ 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ] -search_path = '../data/inv/log_hp/collected/' +collect_path = '../data/inv/log_hp/collected/' +condense_path = '../data/inv/log_hp/condensed/' save_path = '../data/inv/log_hp/saturation/' # ANALYSIS SETTINGS: @@ -32,7 +33,7 @@ pad = 0.05 # PREPARATION: if compute_hist: species_scales = [] - min_scale, max_scale = [], [] + min_scale, max_scale = np.inf, -np.inf archives = [{} for _ in target_species] # EXECUTION: @@ -40,31 +41,48 @@ for i, species in enumerate(target_species): print(f'Processing {species}') # Load accumulated invariance data: - path = search_files(species, dir=search_path)[0] + path = search_files(species, dir=collect_path)[0] data, config = load_data(path, ['scales', 'measure_inv']) # Find upper saturation point per song file: crit_inds = np.array(get_saturation(data['measure_inv'], **plateau_settings)[1]) crit_scales = data['scales'][crit_inds] + # Load condensed invariance data: + path = search_files(species, incl=['noise', 'norm-base'], dir=condense_path)[0] + data, _ = load_data(path, ['scales', 'mean_inv']) + + # Find single upper saturation point of condensed curve: + crit_ind = get_saturation(data['mean_inv'].mean(axis=-1), **plateau_settings)[1] + crit_scale = data['scales'][crit_ind] + # Output options: if not compute_hist: # Save species data immediately: - archive = dict(crit_inds=crit_inds, crit_scales=crit_scales, scales=data['scales']) + archive = dict( + scales=data['scales'], + crit_inds=crit_inds, + crit_scales=crit_scales, + crit_ind=crit_ind, + crit_scale=crit_scale, + ) save_data(save_path + species, archive, config, overwrite=True) continue # Log but don't save data yet: - archives[i]['crit_inds'] = crit_inds - archives[i]['crit_scales'] = crit_scales - archives[i]['scales'] = data['scales'] - min_scale.append(crit_scales.min()) - max_scale.append(crit_scales.max()) + min_scale = min(crit_scales.min(), min_scale) + max_scale = max(crit_scales.max(), max_scale) + archives[i].update( + scales=data['scales'], + crit_inds=crit_inds, + crit_scales=crit_scales, + crit_ind=crit_ind, + crit_scale=crit_scale, + ) # Optional histogram: if compute_hist: - # Generated shared histogram edges: - min_scale, max_scale = min(min_scale), max(max_scale) + # Generated shared bin edges: pad *= (max_scale - min_scale) edges = np.linspace(max(0, min_scale - pad), max_scale + pad, bins + 1) centers = edges[:-1] + np.diff(edges) / 2