Captioned appendix figures.

Polished some figures.
Shortened existing figure captions.
This commit is contained in:
j-hartling
2026-05-21 18:21:33 +02:00
parent 7996a62bde
commit 59a37503ba
20 changed files with 438 additions and 375 deletions

444
main.tex
View File

@@ -228,6 +228,7 @@ intensity-invariant song representations, the interaction between these
mechanisms, the overall capacity for intensity invariance in the system, and
the ethological implications of our findings.
\newpage
\section{Methods}
% This maybe does not quite fit here, but it is the most general part of the
% methods and applies throughout the whole section, so I put it here for now.
@@ -289,8 +290,8 @@ the following sections.
representations~(boxes) and transformations~(arrows) along
the model pathway. All representations are time-varying.
1st half: Preprocessing stage~(one-dimensional
representation). 2nd half: Feature extraction
stage~(high-dimensional representation). }
representations). 2nd half: Feature extraction
stage~(high-dimensional representations). }
\label{fig:pathway}
\end{figure}
@@ -347,8 +348,8 @@ the following feature extraction stage.
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_pre_stages.pdf}
\caption{\textbf{Representations of a song of \textit{O. rufipes} during
the preprocessing stage.}
\caption{\textbf{Song representations during the preprocessing stage.}
Example song of \textit{O. rufipes}.
\textbf{a}:~Bandpass filtered tympanal signal $\filt(t)$.
\textbf{b}:~Signal envelope $\env(t)$.
\textbf{c}:~Logarithmically compressed envelope $\db(t)$.
@@ -483,14 +484,15 @@ or a simple linear classifier.
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_feat_stages.pdf}
\caption{\textbf{Representations of a song of \textit{O. rufipes} during
the feature extraction stage.}
\caption{\textbf{Song representations during the feature extraction stage.}
Example song of \textit{O. rufipes}.
Different color shades indicate different types of Gabor
kernels with specific lobe number $\kn$ and either $+$ or
$-$ sign, sorted (dark to light) first by increasing $\kn$
and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then
$-$ for each $\kn$; two kernel widths $\kw$ of $4\,$ms and
$32\,$ms per type; 8 types, 16 kernels in total).
kernels with specific lobe number $\kni$ and either $+$ or
$-$ sign, sorted (dark to light) first by increasing
$\kni$ and then by sign~($1\,\leq\,\kni\,\leq\,4$; first
$+$, then $-$ for each $\kni$; two kernel widths $\kwi$ of
$4\,$ms and $32\,$ms per type; 8 types, 16 kernels in
total).
\textbf{a}:~Kernel-specific filter responses $c_i(t)$.
\textbf{b}:~Binary responses $b_i(t)$.
\textbf{c}:~Finalized features $f_i(t)$.}
@@ -653,12 +655,13 @@ which is a reasonable assumption for the raw $\soc(t)$ and $\noc(t)$. However,
the dependency of the ratio on $\sca$ is not necessarily the same for
representations that are transformed from $x(t)$ by nonlinear operations, since
these change the relationship of $\soc(t)$ and $\noc(t)$ in an unpredictable
fashion. Furthermore, the ratio is not a proper SNR of the representation
because it does not relate $\soc(t)$ to $\noc(t)$ within the representation but
rather the entire representation to $\noc(t)$ alone. However, it still provides
a useful measure of the relative intensity of a representation with and without
$\soc(t)$, which is the closest we can get to the SNR of the representation. As
such, the ratio of intensity measures is referred to as SNR in the following.
fashion~(see appendix Fig.\,\ref{fig:app_env-sd}). Furthermore, the ratio is
not a proper SNR of the representation because it does not relate $\soc(t)$ to
$\noc(t)$ within the representation but rather the entire representation to
$\noc(t)$ alone. However, it still provides a useful measure of the relative
intensity of a representation with and without $\soc(t)$, which is the closest
we can get to the SNR of the representation. As such, the ratio of intensity
measures is referred to as SNR in the following.
% Is this legal? "SNR" is much shorter than "ratio of intensity measure to the pure-noise reference measure".
% Haven't used it much yet, sticked to "ratio" in most cases.
@@ -694,9 +697,10 @@ $\thr$ has been specified as a multiple of the pure-noise reference standard
deviation $\sigma_{c_i}$ for input $x(t)=\noc(t)$. This ensures that $\thr$ as
well as the resulting $b_i(t)$ and $f_i(t)$ are comparable across different
$k_i(t)$ because each pure-noise $c_i(t)$ approximately follows a normal
distribution~(see appendix
distribution around zero~(see appendix
Figs.\,\ref{fig:app_thresh-lp_kern-sd}-\ref{fig:app_field_kern-sd}).
\newpage
\section{Results}
\subsection{Mechanisms driving the emergence of intensity invariance}
@@ -767,25 +771,23 @@ more robust input representation and higher input SNR.
\includegraphics[width=\textwidth]{figures/fig_invariance_rect_lp.pdf}
\caption{\textbf{Rectification and lowpass filtering improves SNR
but does not contribute to intensity invariance.}
Input $\raw(t)$ consists of song component $\soc(t)$ scaled by
$\sca$ with optional noise component $\noc(t)$ and is
successively transformed into tympanal signal $\filt(t)$ and
envelope $\env(t)$. Different line styles indicate different
cutoff frequencies $\fc$ of the lowpass filter extracting
$\env(t)$.
\textbf{Top}:~Example representations of $\filt(t)$ and
$\env(t)$ for different $\sca$.
Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$ with
optional $\noc(t)$ and is successively transformed into
tympanal signal $\filt(t)$ and envelope $\env(t)$.
\textbf{Top}:~Examples of $\filt(t)$ and $\env(t)$ for
different $\sca$.
\textbf{a}:~Noiseless case.
\textbf{b}:~Noisy case.
\textbf{Bottom}:~Intensity measures over a range of $\sca$.
\textbf{c}:~Noiseless case: Standard deviations $\sigma_x$ of
$\filt(t)$ and $\env(t)$.
\textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\filt(t)$ and
$\env(t)$ to the respective reference standard deviation
$\sigma_{\eta}$ for input $\raw(t)=\noc(t)$.
\textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
\textbf{Bottom}:~Intensity measures over $\sca$. Different
line styles indicate different cutoff frequencies $\fc$ of the
lowpass filter extracting $\env(t)$.
\textbf{c}:~Noiseless case: Standard deviation $\sigma_x$ of
$\filt(t)$ and $\env(t)$, respectively.
\textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the respective
pure-noise reference $\sigma_{\eta}$ for $\sca=0$.
\textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of
$\env(t)$ as in \textbf{d} for different species (averaged
over songs and recordings, see appendix
over songs and recordings, appendix
Fig.\,\ref{fig:app_rect-lp}).
}
\label{fig:rect-lp}
@@ -907,28 +909,26 @@ is a recurring phenomenon that is further addressed in the following sections.
\caption{\textbf{Intensity invariance through logarithmic compression and
adaptation is restricted by the noise floor and decreases
SNR.}
Input $\filt(t)$ consists of song component $\soc(t)$
scaled by $\sca$ with optional noise component $\noc(t)$
Input $\filt(t)$ consists of $\soc(t)$
scaled by $\sca$ with optional $\noc(t)$
and is successively transformed into envelope $\env(t)$,
logarithmically compressed envelope $\db(t)$, and
intensity-adapted envelope $\adapt(t)$.
\textbf{Top}:~Example representations of $\env(t)$,
$\db(t)$, and $\adapt(t)$ for different $\sca$.
\textbf{Top}:~Examples of $\env(t)$, $\db(t)$, and
$\adapt(t)$ for different $\sca$.
\textbf{a}:~Noiseless case.
\textbf{b}:~Noisy case.
\textbf{Bottom}:~Intensity measures over a range of $\sca$.
\textbf{c}:~Noiseless case: Standard deviations $\sigma_x$
of $\env(t)$, $\db(t)$, and $\adapt(t)$.
\textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\env(t)$,
$\db(t)$, and $\adapt(t)$ to the respective reference
standard deviation $\sigma_{\eta}$ for input
$\filt(t)=\noc(t)$. Shaded areas indicate $5\,\%$ (dark
grey) and $95\,\%$ (light grey) curve span for
$\adapt(t)$.
\textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
\textbf{Bottom}:~Intensity measures over $\sca$.
\textbf{c}:~Noiseless case: Standard deviation $\sigma_x$
of $\env(t)$, $\db(t)$, and $\adapt(t)$, respectively.
\textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the
respective pure-noise reference $\sigma_{\eta}$ for
$\sca=0$. Shaded areas indicate $5\,\%$ (dark grey) and
$95\,\%$ (light grey) curve span for $\adapt(t)$.
\textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of
$\adapt(t)$ as in \textbf{d} for different species
(averaged over songs and recordings, see appendix
Fig\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
(averaged over songs and recordings, appendix
Fig.\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
curve span per species.
}
\label{fig:log-hp}
@@ -1043,33 +1043,32 @@ intensity invariance are further explored in a later section.
\caption{\textbf{Intensity invariance through thresholding and temporal
averaging is mediated by the interaction of threshold
value and noise floor.}
Input $\adapt(t)$ consists of song component $\soc(t)$
scaled by $\sca$ with optional noise component $\noc(t)$
and is transformed into single kernel response $c(t)$,
binary response $b(t)$, and feature $f(t)$. Different
color shades indicate different threshold values $\Theta$
(multiples of reference standard deviation $\sigma_{\eta}$
of $c(t)$ for input $\adapt(t)=\noc(t)$, with darker
colors for higher $\Theta$).
\textbf{Left}:~Noisy case: Example representations of
$\adapt(t)$ as well as $c(t)$, $b(t)$, and $f(t)$ for
different $\sca$.
Input $\adapt(t)$ consists $\soc(t)$ scaled by $\sca$ with
optional $\noc(t)$ and is transformed into single kernel
response $c(t)$, binary response $b(t)$, and feature
$f(t)$. Different color shades indicate different
threshold values $\Theta$ (multiples of pure-noise
standard deviation $\sigma_{\eta}$ of $c(t)$ for $\sca=0$,
with darker colors for higher $\Theta$. See also appendix
Fig.\,\ref{fig:app_thresh-lp_kern-sd}).
\textbf{Left}:~Noisy case: Examples of $\adapt(t)$ as well
as $c(t)$, $b(t)$, and $f(t)$ for different $\sca$.
\textbf{a}:~$\adapt(t)$ with kernel $k(t)$ in black.
\textbf{b\,-\,d}: $c(t)$, $b(t)$, and $f(t)$ based on the
same $\adapt(t)$ from \textbf{a} but with different
same $\adapt(t)$ from \textbf{a} but for different
$\Theta$.
\textbf{Right}:~Average value $\mu_f$ of $f(t)$ for each
$\Theta$ from \textbf{b\,-\,d}. Dots indicate $95\,\%$
curve span (noisy case).
\textbf{e}:~$\mu_f$ over a range of $\sca$, once for the
noisy case (solid lines) and once for the noiseless case
(dotted lines).
\textbf{f}:~Noisy case: $\mu_f$ over the standard
deviation of input $\adapt$ corresponding to the values of
$\sca$ shown in \textbf{e}. Shaded area indicates standard
deviations that would be capped in the output $\adapt(t)$
of the previous transformation pair (see
Fig.\,\ref{fig:log-hp}cd).
\textbf{e}:~$\mu_f$ over $\sca$, once for the noisy case
(solid lines) and once for the noiseless case (dotted
lines).
\textbf{f}:~Noisy case: $\mu_f$ over standard deviation
$\sigma_{\text{adapt}}$ of input $\adapt$ corresponding to
$\sca$ shown in \textbf{e}. Shaded area indicates values
of $\sigma_{\text{adapt}}$ that are capped in the output
$\adapt(t)$ of the previous transformation pair
(Fig.\,\ref{fig:log-hp}cd).
}
\label{fig:thresh-lp_single}
\end{figure}
@@ -1148,29 +1147,29 @@ point of $f_i(t)$ less relevant.
saturates at different points in feature space.}
Same input and processing as in
Fig.\,\ref{fig:thresh-lp_single} but with three different
kernels $k_i$, each with a single kernel-specific
threshold value $\thr=0.5\cdot\sigma_{\eta_i}$.
kernels $k_i$ and a single kernel-specific threshold value
$\thr=0.5\cdot\sigma_{\eta_i}$ (appendix
Fig.\,\ref{fig:app_thresh-lp_kern-sd}).
\textbf{a}:~Examples of species-specific grasshopper
songs.
\textbf{Middle}:~Average value $\mu_{f_i}$ of each feature
\textbf{Middle}:~Average value $\muf$ of each feature
$f_i(t)$ over $\sca$ per species (averaged over songs and
recordings, see appendix
Figs.\,\ref{fig:app_thresh-lp_pure} and
\ref{fig:app_thresh-lp_noise}). Different color shades
indicate different kernels $k_i$. Dots indicate $95\,\%$
curve span per $k_i$.
recordings, appendix Figs.\,\ref{fig:app_thresh-lp_pure}
and \ref{fig:app_thresh-lp_noise}). Different color shades
indicate different $k_i$. Dots indicate $95\,\%$ curve
span per $k_i$.
\textbf{b}:~Noiseless case.
\textbf{c}:~Noisy case.
\textbf{Bottom}:~2D feature spaces spanned by each pair of
$f_i(t)$. Each trajectory corresponds to a
species-specific combination of $\mu_{f_i}$ that develops
species-specific combination of $\muf$ that develops
with $\sca$ (colorbars). Horizontal dashes in the colorbar
indicate $5\,\%$ (dark grey) and $95\,\%$ (light grey)
curve span of the norm across all three $\mu_{f_i}$ per
curve span of the norm across all three $\muf$ per
species.
\textbf{d}:~Noiseless case.
\textbf{e}:~Noisy case. Shaded areas indicate the average
minimum $\mu_{f_i}$ across all species-specific trajectories.
minimum $\muf$ across all species-specific trajectories.
}
\label{fig:thresh-lp_species}
\end{figure}
@@ -1255,34 +1254,34 @@ in principle, work together towards an intensity-invariant song representation.
\includegraphics[width=\textwidth]{figures/fig_invariance_full_Omocestus_rufipes.pdf}
\caption{\textbf{Step-wise emergence of intensity-invariant song
representations along the model pathway.}
Input $\raw(t)$ consists of song component $\soc(t)$
scaled by $\sca$ with added noise component $\noc(t)$ and
is processed up to the feature set $f_i(t)$. Different
color shades indicate different types of Gabor kernels
with specific lobe number $\kn$ and either $+$ or $-$
sign, sorted (dark to light) first by increasing $\kn$ and
then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$
for each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8,
and $16\,$ms per type; 8 types, 40 kernels in total).
\textbf{a}:~Example representations of $\filt(t)$,
$\env(t)$, $\db(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$
for different $\sca$.
\textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$
and $f_i(t)$, the median over kernels is shown. Dots
Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$
with added $\noc(t)$ and is processed up to the feature
set $f_i(t)$ using kernel-specific threshold values
$\thr=2\cdot\sigma_{\eta_i}$ (appendix
Fig.\,\ref{fig:app_full_kern-sd}). Different color shades
indicate different types of Gabor kernels with specific
lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark
to light) first by increasing $\kn$ and then by
sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
$16\,$ms per type; 8 types, 40 $k_i(t)$ in total).
\textbf{a}:~Examples of $\filt(t)$, $\env(t)$, $\db(t)$,
$\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$.
\textbf{b}:~Intensity measures over $\sca$. The median
over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dots
indicate $95\,\%$ curve span for $\db(t)$, $\adapt(t)$,
$c_i(t)$, and $f_i(t)$.
\textbf{c}:~Average value $\mu_{f_i}$ of each feature
$f_i(t)$ over $\sca$.
\textbf{d}:~Ratios of intensity measures to the respective
reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$
and $f_i(t)$, the median over kernel-specific ratios is
shown.
\textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of
\textbf{c}:~Average value $\muf$ of each $f_i(t)$
over $\sca$.
\textbf{d}:~Ratio of intensity measures from \textbf{b} to
the respective pure-noise reference for $\sca=0$.
\textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of
each $c_i(t)$.
\textbf{f}:~Ratios of $\mu_{f_i}$.
\textbf{f}:~Ratio of $\muf$.
\textbf{g}:~Distributions of kernel-specific $\sca$ that
correspond to $95\,\%$ curve span for $c_i(t)$ and
$f_i(t)$. Dots indicate the values from \textbf{b}.
$f_i(t)$. Dots indicate values based on the median from
\textbf{b}.
}
\label{fig:pipeline_full}
\end{figure}
@@ -1337,32 +1336,24 @@ guaranteed simply by disabling logarithmic compression.
\includegraphics[width=\textwidth]{figures/fig_invariance_short_Omocestus_rufipes.pdf}
\caption{\textbf{Effects of disabling logarithmic compression on intensity
invariance along the model pathway.}
Input $\raw(t)$ consists of song component $\soc(t)$
scaled by $\sca$ with added noise component $\noc(t)$ and
is processed up to the feature set $f_i(t)$, skipping
$\db(t)$. Different color shades indicate different types
of Gabor kernels with specific lobe number $\kn$ and
either $+$ or $-$ sign, sorted (dark to light) first by
increasing $\kn$ and then by
sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
$16\,$ms per type; 8 types, 40 kernels in total).
\textbf{a}:~Example representations of $\filt(t)$,
$\env(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for
different $\sca$.
\textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$
and $f_i(t)$, the median over kernels is shown. Dots
indicate $95\,\%$ curve span for $f_i(t)$.
\textbf{c}:~Average value $\mu_{f_i}$ of each feature
$f_i(t)$ over $\sca$.
\textbf{d}:~Ratios of intensity measures to the respective
reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$
and $f_i(t)$, the median over kernel-specific ratios is
shown.
\textbf{e}:~Ratios of $\mu_{f_i}$.
Same input and processing as in
Fig.\,\ref{fig:pipeline_full}, using kernel-specific
threshold values $\thr=2\cdot\sigma_{\eta_i}$ (appendix
Fig.\,\ref{fig:app_short_kern-sd}), except that
logarithmic compression and hence $\db(t)$ are skipped.
\textbf{a}:~Examples of $\filt(t)$, $\env(t)$,
$\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$.
\textbf{b}:~Intensity measures over $\sca$. The median
over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dot
indicates $95\,\%$ curve span for $f_i(t)$.
\textbf{c}:~Average value $\muf$ of each $f_i(t)$
over $\sca$.
\textbf{d}:~Ratio of intensity measures from \textbf{b} to
the respective pure-noise reference for $\sca=0$.
\textbf{e}:~Ratio of $\muf$.
\textbf{f}:~Distribution of kernel-specific $\sca$ that
correspond to $95\,\%$ curve span for $f_i(t)$. Dots
indicate the value from \textbf{b}.
correspond to $95\,\%$ curve span for $f_i(t)$. Dot
indicates value based on the median from \textbf{b}.
}
\label{fig:pipeline_short}
\end{figure}
@@ -1426,26 +1417,27 @@ distances~(Fig.\,\ref{fig:pipeline_field}a, bottom row).
Input $\raw(t)$ consists of a song of \textit{P.
parallelus} recorded in the field at eight different
distances $d$ and is processed up to the feature set
$f_i(t)$. Different color shades indicate different types
of Gabor kernels with specific lobe number $\kn$ and
either $+$ or $-$ sign, sorted (dark to light) first by
increasing $\kn$ and then by
$f_i(t)$ using kernel-specific threshold values
$\thr=2\cdot\sigma_{\eta_i}$ (appendix
Fig.\,\ref{fig:app_field_kern-sd}). Different color shades
indicate different types of Gabor kernels with specific
lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark
to light) first by increasing $\kn$ and then by
sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
$16\,$ms per type; 8 types, 40 kernels in total).
$16\,$ms per type; 8 types, 40 $k_i(t)$ in total).
\textbf{a}:~$\filt(t)$, $\env(t)$, $\db(t)$, $\adapt(t)$,
$c_i(t)$, and $f_i(t)$ at each $d$. A noise segment from
the same recording is shown for reference.
\textbf{b}:~Intensity measures over $d$. For $c_i(t)$
and $f_i(t)$, the median over kernels is shown.
\textbf{c}:~Average value $\mu_{f_i}$ of each feature
$f_i(t)$ over $d$.
\textbf{d}:~Ratios of intensity measures to the respective
value obtained from the noise reference. For $c_i(t)$ and
$f_i(t)$, the median over kernel-specific ratios is shown.
\textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of
\textbf{b}:~Intensity measures over $d$. The median over
$k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$.
\textbf{c}:~Average value $\muf$ of each $f_i(t)$ over
$d$.
\textbf{d}:~Ratio of intensity measures from \textbf{b} to
the respective value obtained from the noise reference.
\textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of
each $c_i(t)$.
\textbf{f}:~Ratios of $\mu_{f_i}$.
\textbf{f}:~Ratios of $\muf$.
}
\label{fig:pipeline_field}
\end{figure}
@@ -1470,8 +1462,8 @@ song, so that each dot within a subplot corresponds to a single feature
$f_i(t)$. For the intraspecific
comparisons~(Fig.\,\ref{fig:feat_cross_species}, upper triangular), the pairs
of $\muf$ are distributed closely around the diagonal, with a minimum
correlation coefficient of $\rho=0.85$, a maximum of $\rho=0.99$, and a median
of $\rho=0.92$. A given $f_i(t)$ thus tends to have a similar $\muf$ across
correlation coefficient of $\rho=0.82$, a maximum of $\rho=0.99$, and a median
of $\rho=0.91$. A given $f_i(t)$ thus tends to have a similar $\muf$ across
different songs of the same species. In contrast, the pairs of $\muf$ for the
interspecific comparisons~(Fig.\,\ref{fig:feat_cross_species}, lower
triangular) are distributed in a variety of different ways, most in broader
@@ -1479,7 +1471,7 @@ clouds (e.g. \textit{C. biguttulus} vs. \textit{C. mollis}) but some more
narrowly around the diagonal (e.g. \textit{P. parallelus} vs. \textit{C.
dispar}). The correlation coefficients $\rho$ vary widely between different
interspecific comparisons, with a minimum of $\rho=-0.1$, a maximum of
$\rho=0.92$, and a median of $\rho=0.53$. A given $f_i(t)$ therefore tends to
$\rho=0.91$, and a median of $\rho=0.40$. A given $f_i(t)$ therefore tends to
have a less similar $\muf$ across different species than within the same
species, although certain exeptions exist~(Fig.\,\ref{fig:feat_cross_species},
lower right). Accordingly, the feature representation that is generated by the
@@ -1498,18 +1490,17 @@ natural song variation.
\centering
\includegraphics[width=\textwidth]{figures/fig_features_cross_species.pdf}
\caption{\textbf{Interspecific and intraspecific feature variability.}
Average value $\mu_{f_i}$ of each feature $f_i(t)$ against
its counterpart from a 2nd feature set based on a
different input $\raw(t)$. Each dot within a subplot
represents a single feature $f_i(t)$. Different color
Average value $\muf$ of each feature $f_i(t)$ against its
counterpart from a 2nd feature set based on a different
input $\raw(t)$. Data is based on the saturated $\muf$
from Fig.\,\ref{fig:pipeline_full}. Each dot within a
subplot represents a single $f_i(t)$. Different color
shades indicate different types of Gabor kernels with
specific lobe number $\kn$ and either $+$ or $-$ sign,
sorted (dark to light) first by increasing $\kn$ and then
by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
$16\,$ms per type; 8 types, 40 kernels in total). Data is
based on the analysis underlying
Fig\,\ref{fig:pipeline_full}.
$16\,$ms per type; 8 types, 40 kernels in total).
\textbf{Lower triangular}:~Interspecific comparisons
between single songs of different species.
\textbf{Upper triangular}:~Intraspecific comparisons
@@ -1524,7 +1515,8 @@ natural song variation.
\end{figure}
\FloatBarrier
\section{Conclusions \& outlook}
\newpage
\section{Discussion}
% RIPPED FROM INTRODUCTION:
@@ -1677,105 +1669,189 @@ $\rightarrow$ Graded again but highly decorrelated from the acoustic stimulus\\
$\rightarrow$ Parameters of a behavioral response may be graded (e.g. approach speed),
initiation of one behavior over another is categorical (e.g. approach/stay)
\newpage
\section{Appendix}
% Not sure if we really need this one. Might raise more questions than it
% provides answers. The noise component is not stable throughout nonlinear
% transformations, that is all the reader needs to know, i believe.
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_noise_env_sd_conversion_appendix.pdf}
\caption{\textbf{}
}
\caption{\textbf{Conversion of the noise component by envelope extraction.}
Standard deviation $\sigma_{\eta}$ of noise component
$\noc(t)$ within the signal envelope $\env(t)$ over scale
$\sca$. Based on input $\raw(t)$ with $\sigma_{\eta}=1$
(corresponding to the analysis underlying
Fig.\,\ref{fig:rect-lp}), using 100 realizations of
$\noc(t)$.}
\label{fig:app_env-sd}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_rect-lp_appendix.pdf}
\caption{\textbf{}
}
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:rect-lp}e.}
Ratio of the standard deviation $\sigma_{\text{env}}$ to
the pure-noise reference $\sigma_{\eta}$ of the signal
envelope $\env(t)$ over scale $\sca$ for different cutoff
frequencies $\fc$ of the lowpass filter extracting
$\env(t)$. Solid lines and shaded areas indicate mean
$\pm$ standard deviation across songs per recording.
Dashed lines indicate mean across recordings (shown in
Fig.\,\ref{fig:rect-lp}e).}
\label{fig:app_rect-lp}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_log-hp_appendix.pdf}
\caption{\textbf{}
}
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:log-hp}e.}
Ratio of the standard deviation $\sigma_{\text{adapt}}$ to
the pure-noise reference $\sigma_{\eta}$ of the
intensity-adapted envelope $\adapt(t)$ over scale $\sca$.
Solid lines and shaded areas indicate mean $\pm$ standard
deviation across songs per recording. Dashed lines
indicate mean across recordings (shown in
Fig.\,\ref{fig:log-hp}e).}
\label{fig:app_log-hp_curves}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_saturation_log-hp_appendix.pdf}
\caption{\textbf{}
}
\caption{\textbf{Species-specific saturation points underlying
Fig.\,\ref{fig:log-hp}e.}
Distribution of saturation points ($95\,\%$ curve span) of
ratio $\sigma_{\text{adapt}} / \sigma_{\eta}$ of the
intensity-adapted envelope $\adapt(t)$ over scale $\sca$
across all available songs. Dots indicate the saturation
point of the mean curve across songs and recordings (shown
in Fig.\,\ref{fig:log-hp}e, see also appendix
Fig.\,\ref{fig:app_log-hp_curves}).}
\label{fig:app_log-hp_saturation}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_pure_appendix.pdf}
\caption{\textbf{}
}
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}bd.}
Average value $\muf$ of each of the three features
$f_i(t)$ over scale $\sca$ in the noiseless case. Solid
lines and shaded areas indicate mean $\pm$ standard
deviation across songs per recording. Dashed lines
indicate mean across recordings (shown in
Fig.\,\ref{fig:thresh-lp_species}bd).}
\label{fig:app_thresh-lp_pure}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_noise_appendix.pdf}
\caption{\textbf{}
}
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}ce.}
Average value $\muf$ of each of the three features
$f_i(t)$ over scale $\sca$ in the noisy case. Solid lines
and shaded areas indicate mean $\pm$ standard deviation
across songs per recording. Dashed lines indicate mean
across recordings (shown in
Fig.\,\ref{fig:thresh-lp_species}ce).}
\label{fig:app_thresh-lp_noise}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf}
\caption{\textbf{}
\caption{\textbf{Relation between threshold value and pure-noise feature
value for Fig.\,\ref{fig:thresh-lp_single} and
Fig.\,\ref{fig:thresh-lp_species}.}
Proportion of pure-noise kernel response $c_i(t)$ that
exceeds threshold value $\thr$ --- which determines the
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
in multiples of standard deviation $\sigma_{c_i}$.
Corresponds to a "reverse" cumulative distribution
function of $c_i(t)$. Black solid lines indicate rCDF per
kernel $k_i(t)$. Red dashed line indicates rCDF for a
normal distribution with $\mu=0$ and $\sigma=1$.
}
\label{fig:app_thresh-lp_kern-sd}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_full_appendix.pdf}
\caption{\textbf{}
\caption{\textbf{Relation between threshold value and pure-noise feature
value for Fig.\,\ref{fig:pipeline_full}.}
Proportion of pure-noise kernel response $c_i(t)$ that
exceeds threshold value $\thr$ --- which determines the
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
in multiples of standard deviation $\sigma_{c_i}$.
Corresponds to a "reverse" cumulative distribution
function of $c_i(t)$. Black solid lines indicate rCDF per
kernel $k_i(t)$. Red dashed line indicates rCDF for a
normal distribution with $\mu=0$ and $\sigma=1$.
}
\label{fig:app_full_kern-sd}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_short_appendix.pdf}
\caption{\textbf{}
\caption{\textbf{Relation between threshold value and pure-noise feature
value for Fig.\,\ref{fig:pipeline_short}.}
Proportion of pure-noise kernel response $c_i(t)$ that
exceeds threshold value $\thr$ --- which determines the
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
in multiples of standard deviation $\sigma_{c_i}$.
Corresponds to a "reverse" cumulative distribution
function of $c_i(t)$. Black solid lines indicate rCDF per
kernel $k_i(t)$. Red dashed line indicates rCDF for a
normal distribution with $\mu=0$ and $\sigma=1$.
}
\label{fig:app_short_kern-sd}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_field_appendix.pdf}
\caption{\textbf{}
\caption{\textbf{Relation between threshold value and pure-noise feature
value for Fig.\,\ref{fig:pipeline_field}.}
Proportion of pure-noise kernel response $c_i(t)$ that
exceeds threshold value $\thr$ --- which determines the
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
in multiples of standard deviation $\sigma_{c_i}$.
Corresponds to a "reverse" cumulative distribution
function of $c_i(t)$. Black solid lines indicate rCDF per
kernel $k_i(t)$. Red dashed line indicates rCDF for a
normal distribution with $\mu=0$ and $\sigma=1$.
}
\label{fig:app_field_kern-sd}
\end{figure}
\end{figure}% Referenced.
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_cross_species_thresh_appendix.pdf}
\caption{\textbf{}
\caption{\textbf{Threshold-dependent intensity invariance of
species-specific feature sets.}
Same input and processing as in
Fig.\,\ref{fig:pipeline_full}, using different
kernel-specific threshold values $\thr$ (multiples of
pure-noise standard deviation $\sigma_{\eta_i}$ of
$c_i(t)$ for $\sca=0$. See also appendix
Fig.\,\ref{fig:app_full_kern-sd}). Average value $\muf$ of
each feature $f_i(t)$ over $\sca$.
}
\label{fig:app_cross_species_thresh}
\end{figure}
\end{figure}% Reference this one!
\FloatBarrier
\end{document}