Updated/made figure captions up to Fig. 7.

2026-05-03 19:55:26 +02:00
parent 9c5811d97c
commit 69f172ff2c
8 changed files with 1299 additions and 1223 deletions
--- a/main.tex
+++ b/main.tex
@@ -67,12 +67,12 @@
 \newcommand{\thp}{T_{\text{HP}}} % Highpass filter adaptation interval

 % Math shorthands - Early representations:
-\newcommand{\raw}{x} % Placeholder input signal
-\newcommand{\filt}{\raw_{\text{filt}}} % Bandpass-filtered signal
-\newcommand{\env}{\raw_{\text{env}}} % Signal envelope
-\newcommand{\db}{\raw_{\text{log}}} % Logarithmically scaled signal
-\newcommand{\dbref}{\raw_{\text{ref}}} % Decibel reference intensity
-\newcommand{\adapt}{\raw_{\text{adapt}}} % Adapted signal
+\newcommand{\raw}{x_{\text{raw}}} % Placeholder input signal
+\newcommand{\filt}{x_{\text{filt}}} % Bandpass filtered signal
+\newcommand{\env}{x_{\text{env}}} % Signal envelope
+\newcommand{\db}{x_{\text{log}}} % Logarithmically scaled signal
+\newcommand{\dbref}{x_{\text{ref}}} % Decibel reference intensity
+\newcommand{\adapt}{x_{\text{adapt}}} % Adapted signal

 % Math shorthands - Kernel parameters:
 \newcommand{\kw}{\sigma} % Unspecific Gabor kernel width
@@ -354,9 +354,8 @@ outlined in the following sections.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_auditory_pathway.pdf}
-    \caption{\textbf{Schematic organisation of the song recognition pathway in
-                     grasshoppers compared to the structure of the functional
-                     model pathway.}
+    \caption{\textbf{Schematic organisation of the grasshopper song recognition
+                     pathway and structure of the functional model pathway.}
                     \textbf{a}:~Simplified course of the pathway in the
                     grasshopper, from the tympanal membrane over receptor
                     neurons, local interneurons, and ascending neurons further
@@ -365,12 +364,12 @@ outlined in the following sections.
                     the three neuronal populations within the metathoracic
                     ganglion.
                     \textbf{c}:~Network representation of neuronal connectivity.
-                     \textbf{d}:~Flow diagram of the different signal
-                     representations and transformations along the model
-                     pathway. All representations are time-varying. 1st half:
-                     Preprocessing stage (one-dimensional). 2nd half: Feature
-                     extraction stage (high-dimensional).
-                     }
+                     \textbf{d}:~Flow diagram of consecutive signal
+                     representations~(boxes) and transformations~(arrows) along
+                     the model pathway. All representations are time-varying.
+                     1st half: Preprocessing stage~(one-dimensional
+                     representation). 2nd half: Feature extraction
+                     stage~(high-dimensional representation). }
    \label{fig:pathway}
 \end{figure}

@@ -428,12 +427,12 @@ following feature extraction stage.
    \includegraphics[width=\textwidth]{figures/fig_pre_stages.pdf}
    \caption{\textbf{Representations of a song of \textit{O. rufipes} during
                     the preprocessing stage.}
-                     \textbf{a}:~Bandpass-filtered tympanal signal.
-                     \textbf{b}:~Signal envelope.
-                     \textbf{c}:~Logarithmically scaled envelope.
-                     \textbf{d}:~Intensity-adapted envelope.
+                     \textbf{a}:~Bandpass filtered tympanal signal $\filt(t)$.
+                     \textbf{b}:~Signal envelope $\env(t)$.
+                     \textbf{c}:~Logarithmically compressed envelope $\db(t)$.
+                     \textbf{d}:~Intensity-adapted envelope $\adapt(t)$.
                     }
-    \label{fig:pre}
+    \label{fig:stages_pre}
 \end{figure}
 \FloatBarrier

@@ -543,14 +542,15 @@ can be read out by a simple linear classifier.
    \includegraphics[width=\textwidth]{figures/fig_feat_stages.pdf}
    \caption{\textbf{Representations of a song of \textit{O. rufipes} during
                     the feature extraction stage.}
-                     Different colors indicate Gabor kernels with different
-                     lobe number $\kn$ and sign, with lighter colors for higher
-                     $\kn$~($1\,\leq\,\kn\,\leq\,4$; both $+$ and $-$ per $\kn$;
-                     two kernel widths $\kw$ of $4\,$ms and $32\,$ms per sign).
-                     \textbf{a}:~Kernel-specific filter responses.
-                     \textbf{b}:~Binary responses.
-                     \textbf{c}:~Finalized features.
-                     }
+                     Different color shades indicate different types of Gabor
+                     kernels with specific lobe number $\kn$ and either $+$ or
+                     $-$ sign, sorted (dark to light) first by increasing $\kn$
+                     and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then
+                     $-$ for each $\kn$; two kernel widths $\kw$ of $4\,$ms and
+                     $32\,$ms per type; 8 types, 16 kernels in total).
+                     \textbf{a}:~Kernel-specific filter responses $c_i(t)$.
+                     \textbf{b}:~Binary responses $b_i(t)$.
+                     \textbf{c}:~Finalized features $f_i(t)$.}
    \label{fig:stages_feat}
 \end{figure}
 \FloatBarrier
@@ -576,29 +576,30 @@ specific operations involved, as outlined in the following sections.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_rect_lp.pdf}
-    \caption{\textbf{Intensity invariance by logarithmic compression and
-                     adaptation is restricted by the noise floor.}
-                     Synthetic input $\filt(t)$ consists of song component
-                     $\soc(t)$ scaled by $\sca$ with (\figc{} and \figd) or
-                     without (\figa{} and \figb) additive noise component
-                     $\noc(t)$. Input $\filt(t)$ is transformed into envelope
-                     $\env(t)$, logarithmically compressed envelope $\db(t)$,
-                     and intensity-adapted envelope $\adapt(t)$.
-                     \textbf{Left}:~$\env(t)$, $\db(t)$, and $\adapt(t)$ for
-                     different scales $\sca$.
-                     \textbf{Right}:~Ratios of the standard deviation of
-                     $\env(t)$, $\db(t)$, and $\adapt(t)$ relative to the
-                     respective reference standard deviation $\sigma_{\eta}$
-                     for input $\filt(t)=\noc(t)$.
-                     \figa{} and \figb:~Ideally, if $\filt(t)=\sca\cdot\soc(t)$, then
-                     $\adapt(t)$ is intensity-invariant across all $\sca$.
-                     \figc{} and \figd:~In practice, if
-                     $\filt(t)=\sca\cdot\soc(t)+\noc(t)$, the intensity
-                     invariance of $\adapt(t)$ is limited to sufficiently large
-                     $\sca$. Shaded area indicates saturation of $\adapt(t)$ at
-                     $95\,\%$ curve span.
-                     }
-    \label{fig:inv_rect-lp}
+\caption{\textbf{Rectification and lowpass filtering improves SNR
+                 but does not contribute to intensity invariance.}
+                 Input $\raw(t)$ consists of song component $\soc(t)$ scaled by
+                 $\sca$ with optional noise component $\noc(t)$ and is
+                 successively transformed into tympanal signal $\filt(t)$ and
+                 envelope $\env(t)$. Different line styles indicate different
+                 cutoff frequencies $\fc$ of the lowpass filter extracting
+                 $\env(t)$.
+                 \textbf{Top}:~Example representations of $\filt(t)$ and
+                 $\env(t)$ for different $\sca$.
+                 \textbf{a}:~Noiseless case.
+                 \textbf{b}:~Noisy case.
+                 \textbf{Bottom}:~Intensity metrics over a range of $\sca$.
+                 \textbf{c}:~Noiseless case: Standard deviations $\sigma_x$ of
+                 $\filt(t)$ and $\env(t)$.
+                 \textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\filt(t)$ and
+                 $\env(t)$ to the respective reference standard deviation
+                 $\sigma_{\eta}$ for input $\raw(t)=\noc(t)$.
+                 \textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
+                 $\env(t)$ as in \textbf{d} for different species (averaged
+                 over songs and recordings, see appendix
+                 Fig.\,\ref{fig:app_rect-lp}).
+                }
+    \label{fig:rect-lp}
 \end{figure}
 \FloatBarrier

@@ -634,7 +635,7 @@ space into an additive term, or offset, in logarithmic space
 \end{equation}
 which allows for its separation from $\soc(t)$ but introduces a scaling of
 $\noc(t)$ by the inverse of $\sca$. The subsequent
-highpass-filtering~(Eq.\,\ref{eq:highpass}) of $\db(t)$ can then be
+highpass filtering~(Eq.\,\ref{eq:highpass}) of $\db(t)$ can then be
 approximated as a subtraction of the local offset within a suitable time
 interval $0 \ll \thp < \frac{1}{\fc}$:
 % \begin{equation}
@@ -675,29 +676,34 @@ the signal for reliable song recognition.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_log_hp.pdf}
-    \caption{\textbf{Intensity invariance by logarithmic compression and
-                     adaptation is restricted by the noise floor.}
-                     Synthetic input $\filt(t)$ consists of song component
-                     $\soc(t)$ scaled by $\sca$ with (\figc{} and \figd) or
-                     without (\figa{} and \figb) additive noise component
-                     $\noc(t)$. Input $\filt(t)$ is transformed into envelope
-                     $\env(t)$, logarithmically compressed envelope $\db(t)$,
-                     and intensity-adapted envelope $\adapt(t)$.
-                     \textbf{Left}:~$\env(t)$, $\db(t)$, and $\adapt(t)$ for
-                     different scales $\sca$.
-                     \textbf{Right}:~Ratios of the standard deviation of
-                     $\env(t)$, $\db(t)$, and $\adapt(t)$ relative to the
-                     respective reference standard deviation $\sigma_{\eta}$
-                     for input $\filt(t)=\noc(t)$.
-                     \figa{} and \figb:~Ideally, if $\filt(t)=\sca\cdot\soc(t)$, then
-                     $\adapt(t)$ is intensity-invariant across all $\sca$.
-                     \figc{} and \figd:~In practice, if
-                     $\filt(t)=\sca\cdot\soc(t)+\noc(t)$, the intensity
-                     invariance of $\adapt(t)$ is limited to sufficiently large
-                     $\sca$. Shaded area indicates saturation of $\adapt(t)$ at
-                     $95\,\%$ curve span.
+    \caption{\textbf{Intensity invariance through logarithmic compression and
+                     adaptation is restricted by the noise floor and decreases
+                     SNR.}
+                     Input $\filt(t)$ consists of song component $\soc(t)$
+                     scaled by $\sca$ with optional noise component $\noc(t)$
+                     and is successively transformed into envelope $\env(t)$,
+                     logarithmically compressed envelope $\db(t)$, and
+                     intensity-adapted envelope $\adapt(t)$.
+                     \textbf{Top}:~Example representations of $\env(t)$,
+                     $\db(t)$, and $\adapt(t)$ for different $\sca$.
+                     \textbf{a}:~Noiseless case.
+                     \textbf{b}:~Noisy case.
+                     \textbf{Bottom}:~Intensity metrics over a range of $\sca$.
+                     \textbf{c}:~Noiseless case: Standard deviations $\sigma_x$
+                     of $\env(t)$, $\db(t)$, and $\adapt(t)$.
+                     \textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\env(t)$,
+                     $\db(t)$, and $\adapt(t)$ to the respective reference
+                     standard deviation $\sigma_{\eta}$ for input
+                     $\filt(t)=\noc(t)$. Shaded areas indicate $5\,\%$ (dark
+                     grey) and $95\,\%$ (light grey) curve span for
+                     $\adapt(t)$.
+                     \textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
+                     $\adapt(t)$ as in \textbf{d} for different species
+                     (averaged over songs and recordings, see appendix
+                     Fig\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
+                     curve span per species.
                     }
-    \label{fig:inv_log-hp}
+    \label{fig:log-hp}
 \end{figure}
 \FloatBarrier

@@ -706,40 +712,93 @@ the signal for reliable song recognition.
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh_lp_single.pdf}
-    \caption{\textbf{Intensity invariance by thresholding and temporal
-                     averaging depends on both the threshold value and the
-                     noise floor.}
-                     Synthetic input $\adapt(t)$ consists of song component
-                     $\soc(t)$ scaled by $\sca$ with additive noise component
-                     $\noc(t)$. Input $\adapt(t)$ is transformed into kernel
-                     response $c_i(t)$, binary response $b_i(t)$, and feature
-                     $f_i(t)$. Threshold value $\thr$ is set to multiples of
-                     the reference standard deviation $\sigma_{\eta}$ of $c_i(t)$ for input
-                     $\adapt(t)=\noc(t)$. Darker colors correspond to higher
-                     $\thr$.
-                     \textbf{Left}:~$\adapt(t)$, $c_i(t)$, $b_i(t)$, and
-                     $f_i(t)$ for different scales $\sca$ and threshold values
-                     $\thr$. Left-most column is is the pure-noise reference.
-                     \textbf{Right}:~Average value of $f_i(t)$ during the song
-                     for the different $\thr$.
-                     \figa:~Input $\adapt(t)$.
-                     \figb-\figd:~$c_i(t)$, $b_i(t)$, and $f_i(t)$ for the
-                     different $\thr$ based on the same $\adapt(t)$ from
-                     \figa{}.
-                     \fige:~Average value of $f_i(t)$ during the song for
-                     the different $\thr$ in \figb{}-\figd.
+    \caption{\textbf{Intensity invariance through thresholding and temporal
+                     averaging is mediated by the interaction of threshold
+                     value and noise floor.}
+                     Input $\adapt(t)$ consists of song component $\soc(t)$
+                     scaled by $\sca$ with optional noise component $\noc(t)$
+                     and is transformed into single kernel response $c(t)$,
+                     binary response $b(t)$, and feature $f(t)$. Different
+                     color shades indicate different threshold values $\Theta$
+                     (multiples of reference standard deviation $\sigma_{\eta}$
+                     of $c(t)$ for input $\adapt(t)=\noc(t)$, with darker
+                     colors for higher $\Theta$).
+                     \textbf{Left}:~Noisy case: Example representations of
+                     $\adapt(t)$ as well as $c(t)$, $b(t)$, and $f(t)$ for
+                     different $\sca$.
+                     \textbf{a}:~$\adapt(t)$ with kernel $k(t)$ in black.
+                     \textbf{b\,-\,d}: $c(t)$, $b(t)$, and $f(t)$ based on the
+                     same $\adapt(t)$ from \textbf{a} but with different
+                     $\Theta$.
+                     \textbf{Right}:~Average value $\mu_f$ of $f(t)$ for each
+                     $\Theta$ from \textbf{b\,-\,d}, once for the noisy case
+                     (solid lines) and once for the noiseless case (dotted
+                     lines). Dots indicate $95\,\%$ curve span (noisy case).
+                     \textbf{e}:~$\mu_f$ over a range of $\sca$.
+                     \textbf{f}:~$\mu_f$ over the standard deviation of noisy
+                     input $\adapt$ corresponding to the values of $\sca$ shown
+                     in \textbf{e}.
+                     % Why plot noiseless case over SD of noisy input? Omit?
                     }
-    \label{fig:inv_thresh-lp_single}
+    \label{fig:thresh-lp_single}
 \end{figure}
 \FloatBarrier

+
+% \caption{\textbf{Rectification and lowpass filtering improves SNR
+%                  but does not contribute to intensity invariance.}
+%                  Input $\raw(t)$ consists of song component $\soc(t)$ scaled by
+%                  $\sca$ with optional noise component $\noc(t)$ and is
+%                  successively transformed into tympanal signal $\filt(t)$ and
+%                  envelope $\env(t)$. Different line styles indicate different
+%                  cutoff frequencies $\fc$ of the lowpass filter extracting
+%                  $\env(t)$.
+%                  \textbf{Top}:~Example representations of $\filt(t)$ and
+%                  $\env(t)$ for different $\sca$.
+%                  \textbf{a}:~Noiseless case.
+%                  \textbf{b}:~Noisy case.
+%                  \textbf{Bottom}:~Intensity metrics over a range of $\sca$.
+%                  \textbf{c}:~Noiseless case: Standard deviations of $\filt(t)$
+%                  and $\env(t)$.
+%                  \textbf{d}:~Noisy case: Ratios of standard deviations of
+%                  $\filt(t)$ and $\env(t)$ to the respective reference standard
+%                  deviation for input $\raw(t)=\noc(t)$.
+%                  \textbf{e}:~Ratios of standard deviations of $\env(t)$ as in
+%                  \textbf{b} for different species (averaged over songs and
+%                  recordings, see appendix Fig.\,\ref{fig:app_rect-lp}).
+%                 }
+
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh_lp_species.pdf}
    \caption{\textbf{Feature representation of different species-specific songs
                     saturates at different points in feature space.}
+                     Same input and processing as in
+                     Fig.\,\ref{fig:thresh-lp_single} but with three different
+                     kernels $k_i$, each with a single kernel-specific
+                     threshold value $\thr=0.5\cdot\sigma_{\eta_i}$.
+                     \textbf{a}:~Examples of species-specific grasshopper
+                     songs.
+                     \textbf{Middle}:~Average value $\mu_{f_i}$ of each feature
+                     $f_i(t)$ over $\sca$ per species (averaged over songs and
+                     recordings, see appendix
+                     Figs.\,\ref{fig:app_thresh-lp_pure} and
+                     \ref{fig:app_thresh-lp_noise}). Different color shades
+                     indicate different kernels $k_i$. Dots indicate $95\,\%$
+                     curve span per $k_i$.
+                     \textbf{b}:~Noiseless case.
+                     \textbf{c}:~Noisy case.
+                     \textbf{Bottom}:~2D feature spaces spanned by each pair of
+                     $f_i(t)$. Each trajectory corresponds to a
+                     species-specific combination of $\mu_{f_i}$ that develops
+                     with $\sca$ (colorbars). Horizontal dashes in the colorbar
+                     indicate $5\,\%$ (dark grey) and $95\,\%$ (light grey)
+                     curve span of the norm across all three $\mu_{f_i}$ per
+                     species.
+                     \textbf{d}:~Noiseless case.
+                     \textbf{e}:~Noisy case. Shaded areas
                     }
-    \label{fig:inv_thresh-lp_species}
+    \label{fig:thresh-lp_species}
 \end{figure}
 \FloatBarrier

@@ -749,7 +808,7 @@ the signal for reliable song recognition.
    \caption{\textbf{Step-wise emergence of intensity invariant song
                     representation along the model pathway.}
                     }
-    \label{fig:inv_full}
+    \label{fig:pipeline_full}
 \end{figure}
 \FloatBarrier

@@ -759,7 +818,7 @@ the signal for reliable song recognition.
    \caption{\textbf{Step-wise emergence of intensity invariant song
                     representation along the model pathway.}
                     }
-    \label{fig:inv_short}
+    \label{fig:pipeline_short}
 \end{figure}
 \FloatBarrier

@@ -768,7 +827,7 @@ the signal for reliable song recognition.
    \includegraphics[width=\textwidth]{figures/fig_features_cross_species.pdf}
    \caption{\textbf{Inter- and intraspecific feature variability.}
                     }
-    \label{fig:cross_species}
+    \label{fig:feat_cross_species}
 \end{figure}
 \FloatBarrier

@@ -778,7 +837,7 @@ the signal for reliable song recognition.
    \caption{\textbf{Step-wise emergence of intensity invariant song
                     representation along the model pathway.}
                     }
-    \label{fig:inv_field}
+    \label{fig:pipeline_field}
 \end{figure}
 \FloatBarrier

@@ -936,7 +995,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_noise_env_sd_conversion_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_env-sd}
 \end{figure}
 \FloatBarrier

@@ -945,7 +1004,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_invariance_rect-lp_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_rect-lp}
 \end{figure}
 \FloatBarrier

@@ -954,7 +1013,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_invariance_log-hp_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_log-hp_curves}
 \end{figure}
 \FloatBarrier

@@ -963,7 +1022,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
   \includegraphics[width=\textwidth]{figures/fig_saturation_log-hp_appendix.pdf}
   \caption{\textbf{}
                    }
-   \label{}
+   \label{fig:app_log-hp_saturation}
 \end{figure}
 \FloatBarrier

@@ -972,7 +1031,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_pure_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_thresh-lp_pure}
 \end{figure}
 \FloatBarrier

@@ -981,7 +1040,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_noise_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_thresh-lp_noise}
 \end{figure}
 \FloatBarrier

@@ -990,7 +1049,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_thresh-lp_kern-sd}
 \end{figure}
 \FloatBarrier

@@ -999,7 +1058,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_full_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_full_kern-sd}
 \end{figure}
 \FloatBarrier

@@ -1008,7 +1067,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_short_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_short_kern-sd}
 \end{figure}
 \FloatBarrier

@@ -1017,7 +1076,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_field_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_field_kern-sd}
 \end{figure}
 \FloatBarrier

@@ -1027,7 +1086,7 @@ initiation of one behavior over another is categorical (e.g. approach/stay)
    \includegraphics[width=\textwidth]{figures/fig_invariance_cross_species_thresh_appendix.pdf}
    \caption{\textbf{}
                     }
-    \label{}
+    \label{fig:app_cross_species_thresh}
 \end{figure}
 \FloatBarrier