Captioned appendix figures.

Polished some figures. Shortened existing figure captions.
2026-05-21 18:21:33 +02:00
parent 7996a62bde
commit 59a37503ba
20 changed files with 438 additions and 375 deletions
--- a/main.tex
+++ b/main.tex
@@ -228,6 +228,7 @@ intensity-invariant song representations, the interaction between these
 mechanisms, the overall capacity for intensity invariance in the system, and
 the ethological implications of our findings.

+\newpage
 \section{Methods}
 % This maybe does not quite fit here, but it is the most general part of the
 % methods and applies throughout the whole section, so I put it here for now.
@@ -289,8 +290,8 @@ the following sections.
                     representations~(boxes) and transformations~(arrows) along
                     the model pathway. All representations are time-varying.
                     1st half: Preprocessing stage~(one-dimensional
-                     representation). 2nd half: Feature extraction
-                     stage~(high-dimensional representation). }
+                     representations). 2nd half: Feature extraction
+                     stage~(high-dimensional representations). }
    \label{fig:pathway}
 \end{figure}

@@ -347,8 +348,8 @@ the following feature extraction stage.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_pre_stages.pdf}
-    \caption{\textbf{Representations of a song of \textit{O. rufipes} during
-                     the preprocessing stage.}
+    \caption{\textbf{Song representations during the preprocessing stage.}
+                     Example song of \textit{O. rufipes}.
                     \textbf{a}:~Bandpass filtered tympanal signal $\filt(t)$.
                     \textbf{b}:~Signal envelope $\env(t)$.
                     \textbf{c}:~Logarithmically compressed envelope $\db(t)$.
@@ -483,14 +484,15 @@ or a simple linear classifier.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_feat_stages.pdf}
-    \caption{\textbf{Representations of a song of \textit{O. rufipes} during
-                     the feature extraction stage.}
+    \caption{\textbf{Song representations during the feature extraction stage.}
+                     Example song of \textit{O. rufipes}.
                     Different color shades indicate different types of Gabor
-                     kernels with specific lobe number $\kn$ and either $+$ or
-                     $-$ sign, sorted (dark to light) first by increasing $\kn$
-                     and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then
-                     $-$ for each $\kn$; two kernel widths $\kw$ of $4\,$ms and
-                     $32\,$ms per type; 8 types, 16 kernels in total).
+                     kernels with specific lobe number $\kni$ and either $+$ or
+                     $-$ sign, sorted (dark to light) first by increasing
+                     $\kni$ and then by sign~($1\,\leq\,\kni\,\leq\,4$; first
+                     $+$, then $-$ for each $\kni$; two kernel widths $\kwi$ of
+                     $4\,$ms and $32\,$ms per type; 8 types, 16 kernels in
+                     total).
                     \textbf{a}:~Kernel-specific filter responses $c_i(t)$.
                     \textbf{b}:~Binary responses $b_i(t)$.
                     \textbf{c}:~Finalized features $f_i(t)$.}
@@ -653,12 +655,13 @@ which is a reasonable assumption for the raw $\soc(t)$ and $\noc(t)$. However,
 the dependency of the ratio on $\sca$ is not necessarily the same for
 representations that are transformed from $x(t)$ by nonlinear operations, since
 these change the relationship of $\soc(t)$ and $\noc(t)$ in an unpredictable
-fashion. Furthermore, the ratio is not a proper SNR of the representation
-because it does not relate $\soc(t)$ to $\noc(t)$ within the representation but
-rather the entire representation to $\noc(t)$ alone. However, it still provides
-a useful measure of the relative intensity of a representation with and without
-$\soc(t)$, which is the closest we can get to the SNR of the representation. As
-such, the ratio of intensity measures is referred to as SNR in the following.
+fashion~(see appendix Fig.\,\ref{fig:app_env-sd}). Furthermore, the ratio is
+not a proper SNR of the representation because it does not relate $\soc(t)$ to
+$\noc(t)$ within the representation but rather the entire representation to
+$\noc(t)$ alone. However, it still provides a useful measure of the relative
+intensity of a representation with and without $\soc(t)$, which is the closest
+we can get to the SNR of the representation. As such, the ratio of intensity
+measures is referred to as SNR in the following.
 % Is this legal? "SNR" is much shorter than "ratio of intensity measure to the pure-noise reference measure".
 % Haven't used it much yet, sticked to "ratio" in most cases.

@@ -694,9 +697,10 @@ $\thr$ has been specified as a multiple of the pure-noise reference standard
 deviation $\sigma_{c_i}$ for input $x(t)=\noc(t)$. This ensures that $\thr$ as
 well as the resulting $b_i(t)$ and $f_i(t)$ are comparable across different
 $k_i(t)$ because each pure-noise $c_i(t)$ approximately follows a normal
-distribution~(see appendix
+distribution around zero~(see appendix
 Figs.\,\ref{fig:app_thresh-lp_kern-sd}-\ref{fig:app_field_kern-sd}).

+\newpage
 \section{Results}

 \subsection{Mechanisms driving the emergence of intensity invariance}
@@ -767,25 +771,23 @@ more robust input representation and higher input SNR.
    \includegraphics[width=\textwidth]{figures/fig_invariance_rect_lp.pdf}
 \caption{\textbf{Rectification and lowpass filtering improves SNR
                 but does not contribute to intensity invariance.}
-                 Input $\raw(t)$ consists of song component $\soc(t)$ scaled by
-                 $\sca$ with optional noise component $\noc(t)$ and is
-                 successively transformed into tympanal signal $\filt(t)$ and
-                 envelope $\env(t)$. Different line styles indicate different
-                 cutoff frequencies $\fc$ of the lowpass filter extracting
-                 $\env(t)$.
-                 \textbf{Top}:~Example representations of $\filt(t)$ and
-                 $\env(t)$ for different $\sca$.
+                 Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$ with
+                 optional $\noc(t)$ and is successively transformed into
+                 tympanal signal $\filt(t)$ and envelope $\env(t)$.
+                 \textbf{Top}:~Examples of $\filt(t)$ and $\env(t)$ for
+                 different $\sca$.
                 \textbf{a}:~Noiseless case.
                 \textbf{b}:~Noisy case.
-                 \textbf{Bottom}:~Intensity measures over a range of $\sca$.
-                 \textbf{c}:~Noiseless case: Standard deviations $\sigma_x$ of
-                 $\filt(t)$ and $\env(t)$.
-                 \textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\filt(t)$ and
-                 $\env(t)$ to the respective reference standard deviation
-                 $\sigma_{\eta}$ for input $\raw(t)=\noc(t)$.
-                 \textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
+                 \textbf{Bottom}:~Intensity measures over $\sca$. Different
+                 line styles indicate different cutoff frequencies $\fc$ of the
+                 lowpass filter extracting $\env(t)$.
+                 \textbf{c}:~Noiseless case: Standard deviation $\sigma_x$ of
+                 $\filt(t)$ and $\env(t)$, respectively.
+                 \textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the respective
+                 pure-noise reference $\sigma_{\eta}$ for $\sca=0$.
+                 \textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of
                 $\env(t)$ as in \textbf{d} for different species (averaged
-                 over songs and recordings, see appendix
+                 over songs and recordings, appendix
                 Fig.\,\ref{fig:app_rect-lp}).
                }
    \label{fig:rect-lp}
@@ -907,28 +909,26 @@ is a recurring phenomenon that is further addressed in the following sections.
    \caption{\textbf{Intensity invariance through logarithmic compression and
                     adaptation is restricted by the noise floor and decreases
                     SNR.}
-                     Input $\filt(t)$ consists of song component $\soc(t)$
-                     scaled by $\sca$ with optional noise component $\noc(t)$
+                     Input $\filt(t)$ consists of $\soc(t)$
+                     scaled by $\sca$ with optional $\noc(t)$
                     and is successively transformed into envelope $\env(t)$,
                     logarithmically compressed envelope $\db(t)$, and
                     intensity-adapted envelope $\adapt(t)$.
-                     \textbf{Top}:~Example representations of $\env(t)$,
-                     $\db(t)$, and $\adapt(t)$ for different $\sca$.
+                     \textbf{Top}:~Examples of $\env(t)$, $\db(t)$, and
+                     $\adapt(t)$ for different $\sca$.
                     \textbf{a}:~Noiseless case.
                     \textbf{b}:~Noisy case.
-                     \textbf{Bottom}:~Intensity measures over a range of $\sca$.
-                     \textbf{c}:~Noiseless case: Standard deviations $\sigma_x$
-                     of $\env(t)$, $\db(t)$, and $\adapt(t)$.
-                     \textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\env(t)$,
-                     $\db(t)$, and $\adapt(t)$ to the respective reference
-                     standard deviation $\sigma_{\eta}$ for input
-                     $\filt(t)=\noc(t)$. Shaded areas indicate $5\,\%$ (dark
-                     grey) and $95\,\%$ (light grey) curve span for
-                     $\adapt(t)$.
-                     \textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
+                     \textbf{Bottom}:~Intensity measures over $\sca$.
+                     \textbf{c}:~Noiseless case: Standard deviation $\sigma_x$
+                     of $\env(t)$, $\db(t)$, and $\adapt(t)$, respectively.
+                     \textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the
+                     respective pure-noise reference $\sigma_{\eta}$ for
+                     $\sca=0$. Shaded areas indicate $5\,\%$ (dark grey) and
+                     $95\,\%$ (light grey) curve span for $\adapt(t)$.
+                     \textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of
                     $\adapt(t)$ as in \textbf{d} for different species
-                     (averaged over songs and recordings, see appendix
-                     Fig\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
+                     (averaged over songs and recordings, appendix
+                     Fig.\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
                     curve span per species.
                     }
    \label{fig:log-hp}
@@ -1043,33 +1043,32 @@ intensity invariance are further explored in a later section.
    \caption{\textbf{Intensity invariance through thresholding and temporal
                     averaging is mediated by the interaction of threshold
                     value and noise floor.}
-                     Input $\adapt(t)$ consists of song component $\soc(t)$
-                     scaled by $\sca$ with optional noise component $\noc(t)$
-                     and is transformed into single kernel response $c(t)$,
-                     binary response $b(t)$, and feature $f(t)$. Different
-                     color shades indicate different threshold values $\Theta$
-                     (multiples of reference standard deviation $\sigma_{\eta}$
-                     of $c(t)$ for input $\adapt(t)=\noc(t)$, with darker
-                     colors for higher $\Theta$).
-                     \textbf{Left}:~Noisy case: Example representations of
-                     $\adapt(t)$ as well as $c(t)$, $b(t)$, and $f(t)$ for
-                     different $\sca$.
+                     Input $\adapt(t)$ consists $\soc(t)$ scaled by $\sca$ with
+                     optional $\noc(t)$ and is transformed into single kernel
+                     response $c(t)$, binary response $b(t)$, and feature
+                     $f(t)$. Different color shades indicate different
+                     threshold values $\Theta$ (multiples of pure-noise
+                     standard deviation $\sigma_{\eta}$ of $c(t)$ for $\sca=0$,
+                     with darker colors for higher $\Theta$. See also appendix
+                     Fig.\,\ref{fig:app_thresh-lp_kern-sd}).
+                     \textbf{Left}:~Noisy case: Examples of $\adapt(t)$ as well
+                     as $c(t)$, $b(t)$, and $f(t)$ for different $\sca$.
                     \textbf{a}:~$\adapt(t)$ with kernel $k(t)$ in black.
                     \textbf{b\,-\,d}: $c(t)$, $b(t)$, and $f(t)$ based on the
-                     same $\adapt(t)$ from \textbf{a} but with different
+                     same $\adapt(t)$ from \textbf{a} but for different
                     $\Theta$.
                     \textbf{Right}:~Average value $\mu_f$ of $f(t)$ for each
                     $\Theta$ from \textbf{b\,-\,d}. Dots indicate $95\,\%$
                     curve span (noisy case).
-                     \textbf{e}:~$\mu_f$ over a range of $\sca$, once for the
-                     noisy case (solid lines) and once for the noiseless case
-                     (dotted lines).
-                     \textbf{f}:~Noisy case: $\mu_f$ over the standard
-                     deviation of input $\adapt$ corresponding to the values of
-                     $\sca$ shown in \textbf{e}. Shaded area indicates standard
-                     deviations that would be capped in the output $\adapt(t)$
-                     of the previous transformation pair (see
-                     Fig.\,\ref{fig:log-hp}cd).
+                     \textbf{e}:~$\mu_f$ over $\sca$, once for the noisy case
+                     (solid lines) and once for the noiseless case (dotted
+                     lines).
+                     \textbf{f}:~Noisy case: $\mu_f$ over standard deviation
+                     $\sigma_{\text{adapt}}$ of input $\adapt$ corresponding to
+                     $\sca$ shown in \textbf{e}. Shaded area indicates values
+                     of $\sigma_{\text{adapt}}$ that are capped in the output
+                     $\adapt(t)$ of the previous transformation pair
+                     (Fig.\,\ref{fig:log-hp}cd).
                     }
    \label{fig:thresh-lp_single}
 \end{figure}
@@ -1148,29 +1147,29 @@ point of $f_i(t)$ less relevant.
                     saturates at different points in feature space.}
                     Same input and processing as in
                     Fig.\,\ref{fig:thresh-lp_single} but with three different
-                     kernels $k_i$, each with a single kernel-specific
-                     threshold value $\thr=0.5\cdot\sigma_{\eta_i}$.
+                     kernels $k_i$ and a single kernel-specific threshold value
+                     $\thr=0.5\cdot\sigma_{\eta_i}$ (appendix
+                     Fig.\,\ref{fig:app_thresh-lp_kern-sd}).
                     \textbf{a}:~Examples of species-specific grasshopper
                     songs.
-                     \textbf{Middle}:~Average value $\mu_{f_i}$ of each feature
+                     \textbf{Middle}:~Average value $\muf$ of each feature
                     $f_i(t)$ over $\sca$ per species (averaged over songs and
-                     recordings, see appendix
-                     Figs.\,\ref{fig:app_thresh-lp_pure} and
-                     \ref{fig:app_thresh-lp_noise}). Different color shades
-                     indicate different kernels $k_i$. Dots indicate $95\,\%$
-                     curve span per $k_i$.
+                     recordings, appendix Figs.\,\ref{fig:app_thresh-lp_pure}
+                     and \ref{fig:app_thresh-lp_noise}). Different color shades
+                     indicate different $k_i$. Dots indicate $95\,\%$ curve
+                     span per $k_i$.
                     \textbf{b}:~Noiseless case.
                     \textbf{c}:~Noisy case.
                     \textbf{Bottom}:~2D feature spaces spanned by each pair of
                     $f_i(t)$. Each trajectory corresponds to a
-                     species-specific combination of $\mu_{f_i}$ that develops
+                     species-specific combination of $\muf$ that develops
                     with $\sca$ (colorbars). Horizontal dashes in the colorbar
                     indicate $5\,\%$ (dark grey) and $95\,\%$ (light grey)
-                     curve span of the norm across all three $\mu_{f_i}$ per
+                     curve span of the norm across all three $\muf$ per
                     species.
                     \textbf{d}:~Noiseless case.
                     \textbf{e}:~Noisy case. Shaded areas indicate the average
-                     minimum $\mu_{f_i}$ across all species-specific trajectories.
+                     minimum $\muf$ across all species-specific trajectories.
                     }
    \label{fig:thresh-lp_species}
 \end{figure}
@@ -1255,34 +1254,34 @@ in principle, work together towards an intensity-invariant song representation.
    \includegraphics[width=\textwidth]{figures/fig_invariance_full_Omocestus_rufipes.pdf}
    \caption{\textbf{Step-wise emergence of intensity-invariant song
                     representations along the model pathway.}
-                     Input $\raw(t)$ consists of song component $\soc(t)$
-                     scaled by $\sca$ with added noise component $\noc(t)$ and
-                     is processed up to the feature set $f_i(t)$. Different
-                     color shades indicate different types of Gabor kernels
-                     with specific lobe number $\kn$ and either $+$ or $-$
-                     sign, sorted (dark to light) first by increasing $\kn$ and
-                     then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$
-                     for each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8,
-                     and $16\,$ms per type; 8 types, 40 kernels in total).
-                     \textbf{a}:~Example representations of $\filt(t)$,
-                     $\env(t)$, $\db(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$
-                     for different $\sca$.
-                     \textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$
-                     and $f_i(t)$, the median over kernels is shown. Dots
+                     Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$
+                     with added $\noc(t)$ and is processed up to the feature
+                     set $f_i(t)$ using kernel-specific threshold values
+                     $\thr=2\cdot\sigma_{\eta_i}$ (appendix
+                     Fig.\,\ref{fig:app_full_kern-sd}). Different color shades
+                     indicate different types of Gabor kernels with specific
+                     lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark
+                     to light) first by increasing $\kn$ and then by
+                     sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
+                     each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
+                     $16\,$ms per type; 8 types, 40 $k_i(t)$ in total).
+                     \textbf{a}:~Examples of $\filt(t)$, $\env(t)$, $\db(t)$,
+                     $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$.
+                     \textbf{b}:~Intensity measures over $\sca$. The median
+                     over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dots
                     indicate $95\,\%$ curve span for $\db(t)$, $\adapt(t)$,
                     $c_i(t)$, and $f_i(t)$.
-                     \textbf{c}:~Average value $\mu_{f_i}$ of each feature
-                     $f_i(t)$ over $\sca$.
-                     \textbf{d}:~Ratios of intensity measures to the respective
-                     reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$
-                     and $f_i(t)$, the median over kernel-specific ratios is
-                     shown.
-                     \textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of
+                     \textbf{c}:~Average value $\muf$ of each $f_i(t)$
+                     over $\sca$.
+                     \textbf{d}:~Ratio of intensity measures from \textbf{b} to
+                     the respective pure-noise reference for $\sca=0$.
+                     \textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of
                     each $c_i(t)$.
-                     \textbf{f}:~Ratios of $\mu_{f_i}$.
+                     \textbf{f}:~Ratio of $\muf$.
                     \textbf{g}:~Distributions of kernel-specific $\sca$ that
                     correspond to $95\,\%$ curve span for $c_i(t)$ and
-                     $f_i(t)$. Dots indicate the values from \textbf{b}.
+                     $f_i(t)$. Dots indicate values based on the median from
+                     \textbf{b}.
                     }
    \label{fig:pipeline_full}
 \end{figure}
@@ -1337,32 +1336,24 @@ guaranteed simply by disabling logarithmic compression.
    \includegraphics[width=\textwidth]{figures/fig_invariance_short_Omocestus_rufipes.pdf}
    \caption{\textbf{Effects of disabling logarithmic compression on intensity
                     invariance along the model pathway.}
-                     Input $\raw(t)$ consists of song component $\soc(t)$
-                     scaled by $\sca$ with added noise component $\noc(t)$ and
-                     is processed up to the feature set $f_i(t)$, skipping
-                     $\db(t)$. Different color shades indicate different types
-                     of Gabor kernels with specific lobe number $\kn$ and
-                     either $+$ or $-$ sign, sorted (dark to light) first by
-                     increasing $\kn$ and then by
-                     sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
-                     each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
-                     $16\,$ms per type; 8 types, 40 kernels in total).
-                     \textbf{a}:~Example representations of $\filt(t)$,
-                     $\env(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for
-                     different $\sca$.
-                     \textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$
-                     and $f_i(t)$, the median over kernels is shown. Dots
-                     indicate $95\,\%$ curve span for $f_i(t)$.
-                     \textbf{c}:~Average value $\mu_{f_i}$ of each feature
-                     $f_i(t)$ over $\sca$.
-                     \textbf{d}:~Ratios of intensity measures to the respective
-                     reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$
-                     and $f_i(t)$, the median over kernel-specific ratios is
-                     shown.
-                     \textbf{e}:~Ratios of $\mu_{f_i}$.
+                     Same input and processing as in
+                     Fig.\,\ref{fig:pipeline_full}, using kernel-specific
+                     threshold values $\thr=2\cdot\sigma_{\eta_i}$ (appendix
+                     Fig.\,\ref{fig:app_short_kern-sd}), except that
+                     logarithmic compression and hence $\db(t)$ are skipped.
+                     \textbf{a}:~Examples of $\filt(t)$, $\env(t)$,
+                     $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$.
+                     \textbf{b}:~Intensity measures over $\sca$. The median
+                     over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dot
+                     indicates $95\,\%$ curve span for $f_i(t)$.
+                     \textbf{c}:~Average value $\muf$ of each $f_i(t)$
+                     over $\sca$.
+                     \textbf{d}:~Ratio of intensity measures from \textbf{b} to
+                     the respective pure-noise reference for $\sca=0$.
+                     \textbf{e}:~Ratio of $\muf$.
                     \textbf{f}:~Distribution of kernel-specific $\sca$ that
-                     correspond to $95\,\%$ curve span for $f_i(t)$. Dots
-                     indicate the value from \textbf{b}.
+                     correspond to $95\,\%$ curve span for $f_i(t)$. Dot
+                     indicates value based on the median from \textbf{b}.
                     }
    \label{fig:pipeline_short}
 \end{figure}
@@ -1426,26 +1417,27 @@ distances~(Fig.\,\ref{fig:pipeline_field}a, bottom row).
                     Input $\raw(t)$ consists of a song of \textit{P.
                     parallelus} recorded in the field at eight different
                     distances $d$ and is processed up to the feature set
-                     $f_i(t)$. Different color shades indicate different types
-                     of Gabor kernels with specific lobe number $\kn$ and
-                     either $+$ or $-$ sign, sorted (dark to light) first by
-                     increasing $\kn$ and then by
+                     $f_i(t)$ using kernel-specific threshold values
+                     $\thr=2\cdot\sigma_{\eta_i}$ (appendix
+                     Fig.\,\ref{fig:app_field_kern-sd}). Different color shades
+                     indicate different types of Gabor kernels with specific
+                     lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark
+                     to light) first by increasing $\kn$ and then by
                     sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
                     each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
-                     $16\,$ms per type; 8 types, 40 kernels in total).
+                     $16\,$ms per type; 8 types, 40 $k_i(t)$ in total).
                     \textbf{a}:~$\filt(t)$, $\env(t)$, $\db(t)$, $\adapt(t)$,
                     $c_i(t)$, and $f_i(t)$ at each $d$. A noise segment from
                     the same recording is shown for reference.
-                     \textbf{b}:~Intensity measures over $d$. For $c_i(t)$
-                     and $f_i(t)$, the median over kernels is shown.
-                     \textbf{c}:~Average value $\mu_{f_i}$ of each feature
-                     $f_i(t)$ over $d$.
-                     \textbf{d}:~Ratios of intensity measures to the respective
-                     value obtained from the noise reference. For $c_i(t)$ and
-                     $f_i(t)$, the median over kernel-specific ratios is shown.
-                     \textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of
+                     \textbf{b}:~Intensity measures over $d$. The median over
+                     $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$.
+                     \textbf{c}:~Average value $\muf$ of each $f_i(t)$ over
+                     $d$.
+                     \textbf{d}:~Ratio of intensity measures from \textbf{b} to
+                     the respective value obtained from the noise reference.
+                     \textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of
                     each $c_i(t)$.
-                     \textbf{f}:~Ratios of $\mu_{f_i}$.
+                     \textbf{f}:~Ratios of $\muf$.
                     }
    \label{fig:pipeline_field}
 \end{figure}
@@ -1470,8 +1462,8 @@ song, so that each dot within a subplot corresponds to a single feature
 $f_i(t)$. For the intraspecific
 comparisons~(Fig.\,\ref{fig:feat_cross_species}, upper triangular), the pairs
 of $\muf$ are distributed closely around the diagonal, with a minimum
-correlation coefficient of $\rho=0.85$, a maximum of $\rho=0.99$, and a median
-of $\rho=0.92$. A given $f_i(t)$ thus tends to have a similar $\muf$ across
+correlation coefficient of $\rho=0.82$, a maximum of $\rho=0.99$, and a median
+of $\rho=0.91$. A given $f_i(t)$ thus tends to have a similar $\muf$ across
 different songs of the same species. In contrast, the pairs of $\muf$ for the
 interspecific comparisons~(Fig.\,\ref{fig:feat_cross_species}, lower
 triangular) are distributed in a variety of different ways, most in broader
@@ -1479,7 +1471,7 @@ clouds (e.g. \textit{C. biguttulus} vs. \textit{C. mollis}) but some more
 narrowly around the diagonal (e.g. \textit{P. parallelus} vs. \textit{C.
 dispar}). The correlation coefficients $\rho$ vary widely between different
 interspecific comparisons, with a minimum of $\rho=-0.1$, a maximum of
-$\rho=0.92$, and a median of $\rho=0.53$. A given $f_i(t)$ therefore tends to
+$\rho=0.91$, and a median of $\rho=0.40$. A given $f_i(t)$ therefore tends to
 have a less similar $\muf$ across different species than within the same
 species, although certain exeptions exist~(Fig.\,\ref{fig:feat_cross_species},
 lower right). Accordingly, the feature representation that is generated by the
@@ -1498,18 +1490,17 @@ natural song variation.
    \centering
    \includegraphics[width=\textwidth]{figures/fig_features_cross_species.pdf}
    \caption{\textbf{Interspecific and intraspecific feature variability.}
-                     Average value $\mu_{f_i}$ of each feature $f_i(t)$ against
-                     its counterpart from a 2nd feature set based on a
-                     different input $\raw(t)$. Each dot within a subplot
-                     represents a single feature $f_i(t)$. Different color
+                     Average value $\muf$ of each feature $f_i(t)$ against its
+                     counterpart from a 2nd feature set based on a different
+                     input $\raw(t)$. Data is based on the saturated $\muf$
+                     from Fig.\,\ref{fig:pipeline_full}. Each dot within a
+                     subplot represents a single $f_i(t)$. Different color
                     shades indicate different types of Gabor kernels with
                     specific lobe number $\kn$ and either $+$ or $-$ sign,
                     sorted (dark to light) first by increasing $\kn$ and then
                     by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
                     each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
-                     $16\,$ms per type; 8 types, 40 kernels in total). Data is
-                     based on the analysis underlying
-                     Fig\,\ref{fig:pipeline_full}.
+                     $16\,$ms per type; 8 types, 40 kernels in total).
                     \textbf{Lower triangular}:~Interspecific comparisons
                     between single songs of different species.
                     \textbf{Upper triangular}:~Intraspecific comparisons
@@ -1524,7 +1515,8 @@ natural song variation.
 \end{figure}
 \FloatBarrier

-\section{Conclusions \& outlook}
+\newpage
+\section{Discussion}

 % RIPPED FROM INTRODUCTION:

@@ -1677,105 +1669,189 @@ $\rightarrow$ Graded again but highly decorrelated from the acoustic stimulus\\
 $\rightarrow$ Parameters of a behavioral response may be graded (e.g. approach speed),
 initiation of one behavior over another is categorical (e.g. approach/stay)

+\newpage
+\section{Appendix}

+% Not sure if we really need this one. Might raise more questions than it
+% provides answers. The noise component is not stable throughout nonlinear
+% transformations, that is all the reader needs to know, i believe.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_noise_env_sd_conversion_appendix.pdf}
-    \caption{\textbf{}
-                     }
+    \caption{\textbf{Conversion of the noise component by envelope extraction.}
+                     Standard deviation $\sigma_{\eta}$ of noise component
+                     $\noc(t)$ within the signal envelope $\env(t)$ over scale
+                     $\sca$. Based on input $\raw(t)$ with $\sigma_{\eta}=1$
+                     (corresponding to the analysis underlying
+                     Fig.\,\ref{fig:rect-lp}), using 100 realizations of
+                     $\noc(t)$.}
    \label{fig:app_env-sd}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_rect-lp_appendix.pdf}
-    \caption{\textbf{}
-                     }
+    \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:rect-lp}e.}
+                     Ratio of the standard deviation $\sigma_{\text{env}}$ to
+                     the pure-noise reference $\sigma_{\eta}$ of the signal
+                     envelope $\env(t)$ over scale $\sca$ for different cutoff
+                     frequencies $\fc$ of the lowpass filter extracting
+                     $\env(t)$. Solid lines and shaded areas indicate mean
+                     $\pm$ standard deviation across songs per recording.
+                     Dashed lines indicate mean across recordings (shown in
+                     Fig.\,\ref{fig:rect-lp}e).}
    \label{fig:app_rect-lp}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_log-hp_appendix.pdf}
-    \caption{\textbf{}
-                     }
+    \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:log-hp}e.}
+                     Ratio of the standard deviation $\sigma_{\text{adapt}}$ to
+                     the pure-noise reference $\sigma_{\eta}$ of the
+                     intensity-adapted envelope $\adapt(t)$ over scale $\sca$.
+                     Solid lines and shaded areas indicate mean $\pm$ standard
+                     deviation across songs per recording. Dashed lines
+                     indicate mean across recordings (shown in
+                     Fig.\,\ref{fig:log-hp}e).}
    \label{fig:app_log-hp_curves}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
   \centering
   \includegraphics[width=\textwidth]{figures/fig_saturation_log-hp_appendix.pdf}
-   \caption{\textbf{}
-                    }
+   \caption{\textbf{Species-specific saturation points underlying
+                    Fig.\,\ref{fig:log-hp}e.}
+                    Distribution of saturation points ($95\,\%$ curve span) of
+                    ratio $\sigma_{\text{adapt}} / \sigma_{\eta}$ of the
+                    intensity-adapted envelope $\adapt(t)$ over scale $\sca$
+                    across all available songs. Dots indicate the saturation
+                    point of the mean curve across songs and recordings (shown
+                    in Fig.\,\ref{fig:log-hp}e, see also appendix
+                    Fig.\,\ref{fig:app_log-hp_curves}).}
   \label{fig:app_log-hp_saturation}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_pure_appendix.pdf}
-    \caption{\textbf{}
-                     }
+    \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}bd.}
+                     Average value $\muf$ of each of the three features
+                     $f_i(t)$ over scale $\sca$ in the noiseless case. Solid
+                     lines and shaded areas indicate mean $\pm$ standard
+                     deviation across songs per recording. Dashed lines
+                     indicate mean across recordings (shown in
+                     Fig.\,\ref{fig:thresh-lp_species}bd).}
    \label{fig:app_thresh-lp_pure}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_noise_appendix.pdf}
-    \caption{\textbf{}
-                     }
+    \caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}ce.}
+                     Average value $\muf$ of each of the three features
+                     $f_i(t)$ over scale $\sca$ in the noisy case. Solid lines
+                     and shaded areas indicate mean $\pm$ standard deviation
+                     across songs per recording. Dashed lines indicate mean
+                     across recordings (shown in
+                     Fig.\,\ref{fig:thresh-lp_species}ce).}
    \label{fig:app_thresh-lp_noise}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf}
-    \caption{\textbf{}
+    \caption{\textbf{Relation between threshold value and pure-noise feature
+                     value for Fig.\,\ref{fig:thresh-lp_single} and
+                     Fig.\,\ref{fig:thresh-lp_species}.}
+                     Proportion of pure-noise kernel response $c_i(t)$ that
+                     exceeds threshold value $\thr$ --- which determines the
+                     average value $\muf$ of feature $f_i(t)$ --- over $\thr$
+                     in multiples of standard deviation $\sigma_{c_i}$.
+                     Corresponds to a "reverse" cumulative distribution
+                     function of $c_i(t)$. Black solid lines indicate rCDF per
+                     kernel $k_i(t)$. Red dashed line indicates rCDF for a
+                     normal distribution with $\mu=0$ and $\sigma=1$.
                     }
    \label{fig:app_thresh-lp_kern-sd}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_full_appendix.pdf}
-    \caption{\textbf{}
+    \caption{\textbf{Relation between threshold value and pure-noise feature
+                     value for Fig.\,\ref{fig:pipeline_full}.}
+                     Proportion of pure-noise kernel response $c_i(t)$ that
+                     exceeds threshold value $\thr$ --- which determines the
+                     average value $\muf$ of feature $f_i(t)$ --- over $\thr$
+                     in multiples of standard deviation $\sigma_{c_i}$.
+                     Corresponds to a "reverse" cumulative distribution
+                     function of $c_i(t)$. Black solid lines indicate rCDF per
+                     kernel $k_i(t)$. Red dashed line indicates rCDF for a
+                     normal distribution with $\mu=0$ and $\sigma=1$.
                     }
    \label{fig:app_full_kern-sd}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_short_appendix.pdf}
-    \caption{\textbf{}
+    \caption{\textbf{Relation between threshold value and pure-noise feature
+                     value for Fig.\,\ref{fig:pipeline_short}.}
+                     Proportion of pure-noise kernel response $c_i(t)$ that
+                     exceeds threshold value $\thr$ --- which determines the
+                     average value $\muf$ of feature $f_i(t)$ --- over $\thr$
+                     in multiples of standard deviation $\sigma_{c_i}$.
+                     Corresponds to a "reverse" cumulative distribution
+                     function of $c_i(t)$. Black solid lines indicate rCDF per
+                     kernel $k_i(t)$. Red dashed line indicates rCDF for a
+                     normal distribution with $\mu=0$ and $\sigma=1$.
                     }
    \label{fig:app_short_kern-sd}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_field_appendix.pdf}
-    \caption{\textbf{}
+    \caption{\textbf{Relation between threshold value and pure-noise feature
+                     value for Fig.\,\ref{fig:pipeline_field}.}
+                     Proportion of pure-noise kernel response $c_i(t)$ that
+                     exceeds threshold value $\thr$ --- which determines the
+                     average value $\muf$ of feature $f_i(t)$ --- over $\thr$
+                     in multiples of standard deviation $\sigma_{c_i}$.
+                     Corresponds to a "reverse" cumulative distribution
+                     function of $c_i(t)$. Black solid lines indicate rCDF per
+                     kernel $k_i(t)$. Red dashed line indicates rCDF for a
+                     normal distribution with $\mu=0$ and $\sigma=1$.
                     }
    \label{fig:app_field_kern-sd}
-\end{figure}
+\end{figure}% Referenced.
 \FloatBarrier

-
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_cross_species_thresh_appendix.pdf}
-    \caption{\textbf{}
+    \caption{\textbf{Threshold-dependent intensity invariance of
+                     species-specific feature sets.}
+                     Same input and processing as in
+                     Fig.\,\ref{fig:pipeline_full}, using different
+                     kernel-specific threshold values $\thr$ (multiples of
+                     pure-noise standard deviation $\sigma_{\eta_i}$ of
+                     $c_i(t)$ for $\sca=0$. See also appendix
+                     Fig.\,\ref{fig:app_full_kern-sd}). Average value $\muf$ of
+                     each feature $f_i(t)$ over $\sca$.
                     }
    \label{fig:app_cross_species_thresh}
-\end{figure}
+\end{figure}% Reference this one!
 \FloatBarrier

 \end{document}