Captioned appendix figures.
Polished some figures. Shortened existing figure captions.
This commit is contained in:
444
main.tex
444
main.tex
@@ -228,6 +228,7 @@ intensity-invariant song representations, the interaction between these
|
||||
mechanisms, the overall capacity for intensity invariance in the system, and
|
||||
the ethological implications of our findings.
|
||||
|
||||
\newpage
|
||||
\section{Methods}
|
||||
% This maybe does not quite fit here, but it is the most general part of the
|
||||
% methods and applies throughout the whole section, so I put it here for now.
|
||||
@@ -289,8 +290,8 @@ the following sections.
|
||||
representations~(boxes) and transformations~(arrows) along
|
||||
the model pathway. All representations are time-varying.
|
||||
1st half: Preprocessing stage~(one-dimensional
|
||||
representation). 2nd half: Feature extraction
|
||||
stage~(high-dimensional representation). }
|
||||
representations). 2nd half: Feature extraction
|
||||
stage~(high-dimensional representations). }
|
||||
\label{fig:pathway}
|
||||
\end{figure}
|
||||
|
||||
@@ -347,8 +348,8 @@ the following feature extraction stage.
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_pre_stages.pdf}
|
||||
\caption{\textbf{Representations of a song of \textit{O. rufipes} during
|
||||
the preprocessing stage.}
|
||||
\caption{\textbf{Song representations during the preprocessing stage.}
|
||||
Example song of \textit{O. rufipes}.
|
||||
\textbf{a}:~Bandpass filtered tympanal signal $\filt(t)$.
|
||||
\textbf{b}:~Signal envelope $\env(t)$.
|
||||
\textbf{c}:~Logarithmically compressed envelope $\db(t)$.
|
||||
@@ -483,14 +484,15 @@ or a simple linear classifier.
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_feat_stages.pdf}
|
||||
\caption{\textbf{Representations of a song of \textit{O. rufipes} during
|
||||
the feature extraction stage.}
|
||||
\caption{\textbf{Song representations during the feature extraction stage.}
|
||||
Example song of \textit{O. rufipes}.
|
||||
Different color shades indicate different types of Gabor
|
||||
kernels with specific lobe number $\kn$ and either $+$ or
|
||||
$-$ sign, sorted (dark to light) first by increasing $\kn$
|
||||
and then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then
|
||||
$-$ for each $\kn$; two kernel widths $\kw$ of $4\,$ms and
|
||||
$32\,$ms per type; 8 types, 16 kernels in total).
|
||||
kernels with specific lobe number $\kni$ and either $+$ or
|
||||
$-$ sign, sorted (dark to light) first by increasing
|
||||
$\kni$ and then by sign~($1\,\leq\,\kni\,\leq\,4$; first
|
||||
$+$, then $-$ for each $\kni$; two kernel widths $\kwi$ of
|
||||
$4\,$ms and $32\,$ms per type; 8 types, 16 kernels in
|
||||
total).
|
||||
\textbf{a}:~Kernel-specific filter responses $c_i(t)$.
|
||||
\textbf{b}:~Binary responses $b_i(t)$.
|
||||
\textbf{c}:~Finalized features $f_i(t)$.}
|
||||
@@ -653,12 +655,13 @@ which is a reasonable assumption for the raw $\soc(t)$ and $\noc(t)$. However,
|
||||
the dependency of the ratio on $\sca$ is not necessarily the same for
|
||||
representations that are transformed from $x(t)$ by nonlinear operations, since
|
||||
these change the relationship of $\soc(t)$ and $\noc(t)$ in an unpredictable
|
||||
fashion. Furthermore, the ratio is not a proper SNR of the representation
|
||||
because it does not relate $\soc(t)$ to $\noc(t)$ within the representation but
|
||||
rather the entire representation to $\noc(t)$ alone. However, it still provides
|
||||
a useful measure of the relative intensity of a representation with and without
|
||||
$\soc(t)$, which is the closest we can get to the SNR of the representation. As
|
||||
such, the ratio of intensity measures is referred to as SNR in the following.
|
||||
fashion~(see appendix Fig.\,\ref{fig:app_env-sd}). Furthermore, the ratio is
|
||||
not a proper SNR of the representation because it does not relate $\soc(t)$ to
|
||||
$\noc(t)$ within the representation but rather the entire representation to
|
||||
$\noc(t)$ alone. However, it still provides a useful measure of the relative
|
||||
intensity of a representation with and without $\soc(t)$, which is the closest
|
||||
we can get to the SNR of the representation. As such, the ratio of intensity
|
||||
measures is referred to as SNR in the following.
|
||||
% Is this legal? "SNR" is much shorter than "ratio of intensity measure to the pure-noise reference measure".
|
||||
% Haven't used it much yet, sticked to "ratio" in most cases.
|
||||
|
||||
@@ -694,9 +697,10 @@ $\thr$ has been specified as a multiple of the pure-noise reference standard
|
||||
deviation $\sigma_{c_i}$ for input $x(t)=\noc(t)$. This ensures that $\thr$ as
|
||||
well as the resulting $b_i(t)$ and $f_i(t)$ are comparable across different
|
||||
$k_i(t)$ because each pure-noise $c_i(t)$ approximately follows a normal
|
||||
distribution~(see appendix
|
||||
distribution around zero~(see appendix
|
||||
Figs.\,\ref{fig:app_thresh-lp_kern-sd}-\ref{fig:app_field_kern-sd}).
|
||||
|
||||
\newpage
|
||||
\section{Results}
|
||||
|
||||
\subsection{Mechanisms driving the emergence of intensity invariance}
|
||||
@@ -767,25 +771,23 @@ more robust input representation and higher input SNR.
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_rect_lp.pdf}
|
||||
\caption{\textbf{Rectification and lowpass filtering improves SNR
|
||||
but does not contribute to intensity invariance.}
|
||||
Input $\raw(t)$ consists of song component $\soc(t)$ scaled by
|
||||
$\sca$ with optional noise component $\noc(t)$ and is
|
||||
successively transformed into tympanal signal $\filt(t)$ and
|
||||
envelope $\env(t)$. Different line styles indicate different
|
||||
cutoff frequencies $\fc$ of the lowpass filter extracting
|
||||
$\env(t)$.
|
||||
\textbf{Top}:~Example representations of $\filt(t)$ and
|
||||
$\env(t)$ for different $\sca$.
|
||||
Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$ with
|
||||
optional $\noc(t)$ and is successively transformed into
|
||||
tympanal signal $\filt(t)$ and envelope $\env(t)$.
|
||||
\textbf{Top}:~Examples of $\filt(t)$ and $\env(t)$ for
|
||||
different $\sca$.
|
||||
\textbf{a}:~Noiseless case.
|
||||
\textbf{b}:~Noisy case.
|
||||
\textbf{Bottom}:~Intensity measures over a range of $\sca$.
|
||||
\textbf{c}:~Noiseless case: Standard deviations $\sigma_x$ of
|
||||
$\filt(t)$ and $\env(t)$.
|
||||
\textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\filt(t)$ and
|
||||
$\env(t)$ to the respective reference standard deviation
|
||||
$\sigma_{\eta}$ for input $\raw(t)=\noc(t)$.
|
||||
\textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
|
||||
\textbf{Bottom}:~Intensity measures over $\sca$. Different
|
||||
line styles indicate different cutoff frequencies $\fc$ of the
|
||||
lowpass filter extracting $\env(t)$.
|
||||
\textbf{c}:~Noiseless case: Standard deviation $\sigma_x$ of
|
||||
$\filt(t)$ and $\env(t)$, respectively.
|
||||
\textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the respective
|
||||
pure-noise reference $\sigma_{\eta}$ for $\sca=0$.
|
||||
\textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of
|
||||
$\env(t)$ as in \textbf{d} for different species (averaged
|
||||
over songs and recordings, see appendix
|
||||
over songs and recordings, appendix
|
||||
Fig.\,\ref{fig:app_rect-lp}).
|
||||
}
|
||||
\label{fig:rect-lp}
|
||||
@@ -907,28 +909,26 @@ is a recurring phenomenon that is further addressed in the following sections.
|
||||
\caption{\textbf{Intensity invariance through logarithmic compression and
|
||||
adaptation is restricted by the noise floor and decreases
|
||||
SNR.}
|
||||
Input $\filt(t)$ consists of song component $\soc(t)$
|
||||
scaled by $\sca$ with optional noise component $\noc(t)$
|
||||
Input $\filt(t)$ consists of $\soc(t)$
|
||||
scaled by $\sca$ with optional $\noc(t)$
|
||||
and is successively transformed into envelope $\env(t)$,
|
||||
logarithmically compressed envelope $\db(t)$, and
|
||||
intensity-adapted envelope $\adapt(t)$.
|
||||
\textbf{Top}:~Example representations of $\env(t)$,
|
||||
$\db(t)$, and $\adapt(t)$ for different $\sca$.
|
||||
\textbf{Top}:~Examples of $\env(t)$, $\db(t)$, and
|
||||
$\adapt(t)$ for different $\sca$.
|
||||
\textbf{a}:~Noiseless case.
|
||||
\textbf{b}:~Noisy case.
|
||||
\textbf{Bottom}:~Intensity measures over a range of $\sca$.
|
||||
\textbf{c}:~Noiseless case: Standard deviations $\sigma_x$
|
||||
of $\env(t)$, $\db(t)$, and $\adapt(t)$.
|
||||
\textbf{d}:~Noisy case: Ratios of $\sigma_x$ of $\env(t)$,
|
||||
$\db(t)$, and $\adapt(t)$ to the respective reference
|
||||
standard deviation $\sigma_{\eta}$ for input
|
||||
$\filt(t)=\noc(t)$. Shaded areas indicate $5\,\%$ (dark
|
||||
grey) and $95\,\%$ (light grey) curve span for
|
||||
$\adapt(t)$.
|
||||
\textbf{e}:~Ratios of $\sigma_x$ to $\sigma_{\eta}$ of
|
||||
\textbf{Bottom}:~Intensity measures over $\sca$.
|
||||
\textbf{c}:~Noiseless case: Standard deviation $\sigma_x$
|
||||
of $\env(t)$, $\db(t)$, and $\adapt(t)$, respectively.
|
||||
\textbf{d}:~Noisy case: Ratio of $\sigma_x$ to the
|
||||
respective pure-noise reference $\sigma_{\eta}$ for
|
||||
$\sca=0$. Shaded areas indicate $5\,\%$ (dark grey) and
|
||||
$95\,\%$ (light grey) curve span for $\adapt(t)$.
|
||||
\textbf{e}:~Ratio of $\sigma_x$ to $\sigma_{\eta}$ of
|
||||
$\adapt(t)$ as in \textbf{d} for different species
|
||||
(averaged over songs and recordings, see appendix
|
||||
Fig\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
|
||||
(averaged over songs and recordings, appendix
|
||||
Fig.\,\ref{fig:app_log-hp_curves}). Dots indicate $95\,\%$
|
||||
curve span per species.
|
||||
}
|
||||
\label{fig:log-hp}
|
||||
@@ -1043,33 +1043,32 @@ intensity invariance are further explored in a later section.
|
||||
\caption{\textbf{Intensity invariance through thresholding and temporal
|
||||
averaging is mediated by the interaction of threshold
|
||||
value and noise floor.}
|
||||
Input $\adapt(t)$ consists of song component $\soc(t)$
|
||||
scaled by $\sca$ with optional noise component $\noc(t)$
|
||||
and is transformed into single kernel response $c(t)$,
|
||||
binary response $b(t)$, and feature $f(t)$. Different
|
||||
color shades indicate different threshold values $\Theta$
|
||||
(multiples of reference standard deviation $\sigma_{\eta}$
|
||||
of $c(t)$ for input $\adapt(t)=\noc(t)$, with darker
|
||||
colors for higher $\Theta$).
|
||||
\textbf{Left}:~Noisy case: Example representations of
|
||||
$\adapt(t)$ as well as $c(t)$, $b(t)$, and $f(t)$ for
|
||||
different $\sca$.
|
||||
Input $\adapt(t)$ consists $\soc(t)$ scaled by $\sca$ with
|
||||
optional $\noc(t)$ and is transformed into single kernel
|
||||
response $c(t)$, binary response $b(t)$, and feature
|
||||
$f(t)$. Different color shades indicate different
|
||||
threshold values $\Theta$ (multiples of pure-noise
|
||||
standard deviation $\sigma_{\eta}$ of $c(t)$ for $\sca=0$,
|
||||
with darker colors for higher $\Theta$. See also appendix
|
||||
Fig.\,\ref{fig:app_thresh-lp_kern-sd}).
|
||||
\textbf{Left}:~Noisy case: Examples of $\adapt(t)$ as well
|
||||
as $c(t)$, $b(t)$, and $f(t)$ for different $\sca$.
|
||||
\textbf{a}:~$\adapt(t)$ with kernel $k(t)$ in black.
|
||||
\textbf{b\,-\,d}: $c(t)$, $b(t)$, and $f(t)$ based on the
|
||||
same $\adapt(t)$ from \textbf{a} but with different
|
||||
same $\adapt(t)$ from \textbf{a} but for different
|
||||
$\Theta$.
|
||||
\textbf{Right}:~Average value $\mu_f$ of $f(t)$ for each
|
||||
$\Theta$ from \textbf{b\,-\,d}. Dots indicate $95\,\%$
|
||||
curve span (noisy case).
|
||||
\textbf{e}:~$\mu_f$ over a range of $\sca$, once for the
|
||||
noisy case (solid lines) and once for the noiseless case
|
||||
(dotted lines).
|
||||
\textbf{f}:~Noisy case: $\mu_f$ over the standard
|
||||
deviation of input $\adapt$ corresponding to the values of
|
||||
$\sca$ shown in \textbf{e}. Shaded area indicates standard
|
||||
deviations that would be capped in the output $\adapt(t)$
|
||||
of the previous transformation pair (see
|
||||
Fig.\,\ref{fig:log-hp}cd).
|
||||
\textbf{e}:~$\mu_f$ over $\sca$, once for the noisy case
|
||||
(solid lines) and once for the noiseless case (dotted
|
||||
lines).
|
||||
\textbf{f}:~Noisy case: $\mu_f$ over standard deviation
|
||||
$\sigma_{\text{adapt}}$ of input $\adapt$ corresponding to
|
||||
$\sca$ shown in \textbf{e}. Shaded area indicates values
|
||||
of $\sigma_{\text{adapt}}$ that are capped in the output
|
||||
$\adapt(t)$ of the previous transformation pair
|
||||
(Fig.\,\ref{fig:log-hp}cd).
|
||||
}
|
||||
\label{fig:thresh-lp_single}
|
||||
\end{figure}
|
||||
@@ -1148,29 +1147,29 @@ point of $f_i(t)$ less relevant.
|
||||
saturates at different points in feature space.}
|
||||
Same input and processing as in
|
||||
Fig.\,\ref{fig:thresh-lp_single} but with three different
|
||||
kernels $k_i$, each with a single kernel-specific
|
||||
threshold value $\thr=0.5\cdot\sigma_{\eta_i}$.
|
||||
kernels $k_i$ and a single kernel-specific threshold value
|
||||
$\thr=0.5\cdot\sigma_{\eta_i}$ (appendix
|
||||
Fig.\,\ref{fig:app_thresh-lp_kern-sd}).
|
||||
\textbf{a}:~Examples of species-specific grasshopper
|
||||
songs.
|
||||
\textbf{Middle}:~Average value $\mu_{f_i}$ of each feature
|
||||
\textbf{Middle}:~Average value $\muf$ of each feature
|
||||
$f_i(t)$ over $\sca$ per species (averaged over songs and
|
||||
recordings, see appendix
|
||||
Figs.\,\ref{fig:app_thresh-lp_pure} and
|
||||
\ref{fig:app_thresh-lp_noise}). Different color shades
|
||||
indicate different kernels $k_i$. Dots indicate $95\,\%$
|
||||
curve span per $k_i$.
|
||||
recordings, appendix Figs.\,\ref{fig:app_thresh-lp_pure}
|
||||
and \ref{fig:app_thresh-lp_noise}). Different color shades
|
||||
indicate different $k_i$. Dots indicate $95\,\%$ curve
|
||||
span per $k_i$.
|
||||
\textbf{b}:~Noiseless case.
|
||||
\textbf{c}:~Noisy case.
|
||||
\textbf{Bottom}:~2D feature spaces spanned by each pair of
|
||||
$f_i(t)$. Each trajectory corresponds to a
|
||||
species-specific combination of $\mu_{f_i}$ that develops
|
||||
species-specific combination of $\muf$ that develops
|
||||
with $\sca$ (colorbars). Horizontal dashes in the colorbar
|
||||
indicate $5\,\%$ (dark grey) and $95\,\%$ (light grey)
|
||||
curve span of the norm across all three $\mu_{f_i}$ per
|
||||
curve span of the norm across all three $\muf$ per
|
||||
species.
|
||||
\textbf{d}:~Noiseless case.
|
||||
\textbf{e}:~Noisy case. Shaded areas indicate the average
|
||||
minimum $\mu_{f_i}$ across all species-specific trajectories.
|
||||
minimum $\muf$ across all species-specific trajectories.
|
||||
}
|
||||
\label{fig:thresh-lp_species}
|
||||
\end{figure}
|
||||
@@ -1255,34 +1254,34 @@ in principle, work together towards an intensity-invariant song representation.
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_full_Omocestus_rufipes.pdf}
|
||||
\caption{\textbf{Step-wise emergence of intensity-invariant song
|
||||
representations along the model pathway.}
|
||||
Input $\raw(t)$ consists of song component $\soc(t)$
|
||||
scaled by $\sca$ with added noise component $\noc(t)$ and
|
||||
is processed up to the feature set $f_i(t)$. Different
|
||||
color shades indicate different types of Gabor kernels
|
||||
with specific lobe number $\kn$ and either $+$ or $-$
|
||||
sign, sorted (dark to light) first by increasing $\kn$ and
|
||||
then by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$
|
||||
for each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8,
|
||||
and $16\,$ms per type; 8 types, 40 kernels in total).
|
||||
\textbf{a}:~Example representations of $\filt(t)$,
|
||||
$\env(t)$, $\db(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$
|
||||
for different $\sca$.
|
||||
\textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$
|
||||
and $f_i(t)$, the median over kernels is shown. Dots
|
||||
Input $\raw(t)$ consists of $\soc(t)$ scaled by $\sca$
|
||||
with added $\noc(t)$ and is processed up to the feature
|
||||
set $f_i(t)$ using kernel-specific threshold values
|
||||
$\thr=2\cdot\sigma_{\eta_i}$ (appendix
|
||||
Fig.\,\ref{fig:app_full_kern-sd}). Different color shades
|
||||
indicate different types of Gabor kernels with specific
|
||||
lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark
|
||||
to light) first by increasing $\kn$ and then by
|
||||
sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
|
||||
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
|
||||
$16\,$ms per type; 8 types, 40 $k_i(t)$ in total).
|
||||
\textbf{a}:~Examples of $\filt(t)$, $\env(t)$, $\db(t)$,
|
||||
$\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$.
|
||||
\textbf{b}:~Intensity measures over $\sca$. The median
|
||||
over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dots
|
||||
indicate $95\,\%$ curve span for $\db(t)$, $\adapt(t)$,
|
||||
$c_i(t)$, and $f_i(t)$.
|
||||
\textbf{c}:~Average value $\mu_{f_i}$ of each feature
|
||||
$f_i(t)$ over $\sca$.
|
||||
\textbf{d}:~Ratios of intensity measures to the respective
|
||||
reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$
|
||||
and $f_i(t)$, the median over kernel-specific ratios is
|
||||
shown.
|
||||
\textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of
|
||||
\textbf{c}:~Average value $\muf$ of each $f_i(t)$
|
||||
over $\sca$.
|
||||
\textbf{d}:~Ratio of intensity measures from \textbf{b} to
|
||||
the respective pure-noise reference for $\sca=0$.
|
||||
\textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of
|
||||
each $c_i(t)$.
|
||||
\textbf{f}:~Ratios of $\mu_{f_i}$.
|
||||
\textbf{f}:~Ratio of $\muf$.
|
||||
\textbf{g}:~Distributions of kernel-specific $\sca$ that
|
||||
correspond to $95\,\%$ curve span for $c_i(t)$ and
|
||||
$f_i(t)$. Dots indicate the values from \textbf{b}.
|
||||
$f_i(t)$. Dots indicate values based on the median from
|
||||
\textbf{b}.
|
||||
}
|
||||
\label{fig:pipeline_full}
|
||||
\end{figure}
|
||||
@@ -1337,32 +1336,24 @@ guaranteed simply by disabling logarithmic compression.
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_short_Omocestus_rufipes.pdf}
|
||||
\caption{\textbf{Effects of disabling logarithmic compression on intensity
|
||||
invariance along the model pathway.}
|
||||
Input $\raw(t)$ consists of song component $\soc(t)$
|
||||
scaled by $\sca$ with added noise component $\noc(t)$ and
|
||||
is processed up to the feature set $f_i(t)$, skipping
|
||||
$\db(t)$. Different color shades indicate different types
|
||||
of Gabor kernels with specific lobe number $\kn$ and
|
||||
either $+$ or $-$ sign, sorted (dark to light) first by
|
||||
increasing $\kn$ and then by
|
||||
sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
|
||||
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
|
||||
$16\,$ms per type; 8 types, 40 kernels in total).
|
||||
\textbf{a}:~Example representations of $\filt(t)$,
|
||||
$\env(t)$, $\adapt(t)$, $c_i(t)$, and $f_i(t)$ for
|
||||
different $\sca$.
|
||||
\textbf{b}:~Intensity measures over $\sca$. For $c_i(t)$
|
||||
and $f_i(t)$, the median over kernels is shown. Dots
|
||||
indicate $95\,\%$ curve span for $f_i(t)$.
|
||||
\textbf{c}:~Average value $\mu_{f_i}$ of each feature
|
||||
$f_i(t)$ over $\sca$.
|
||||
\textbf{d}:~Ratios of intensity measures to the respective
|
||||
reference value for input $\raw(t)=\noc(t)$. For $c_i(t)$
|
||||
and $f_i(t)$, the median over kernel-specific ratios is
|
||||
shown.
|
||||
\textbf{e}:~Ratios of $\mu_{f_i}$.
|
||||
Same input and processing as in
|
||||
Fig.\,\ref{fig:pipeline_full}, using kernel-specific
|
||||
threshold values $\thr=2\cdot\sigma_{\eta_i}$ (appendix
|
||||
Fig.\,\ref{fig:app_short_kern-sd}), except that
|
||||
logarithmic compression and hence $\db(t)$ are skipped.
|
||||
\textbf{a}:~Examples of $\filt(t)$, $\env(t)$,
|
||||
$\adapt(t)$, $c_i(t)$, and $f_i(t)$ for different $\sca$.
|
||||
\textbf{b}:~Intensity measures over $\sca$. The median
|
||||
over $k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$. Dot
|
||||
indicates $95\,\%$ curve span for $f_i(t)$.
|
||||
\textbf{c}:~Average value $\muf$ of each $f_i(t)$
|
||||
over $\sca$.
|
||||
\textbf{d}:~Ratio of intensity measures from \textbf{b} to
|
||||
the respective pure-noise reference for $\sca=0$.
|
||||
\textbf{e}:~Ratio of $\muf$.
|
||||
\textbf{f}:~Distribution of kernel-specific $\sca$ that
|
||||
correspond to $95\,\%$ curve span for $f_i(t)$. Dots
|
||||
indicate the value from \textbf{b}.
|
||||
correspond to $95\,\%$ curve span for $f_i(t)$. Dot
|
||||
indicates value based on the median from \textbf{b}.
|
||||
}
|
||||
\label{fig:pipeline_short}
|
||||
\end{figure}
|
||||
@@ -1426,26 +1417,27 @@ distances~(Fig.\,\ref{fig:pipeline_field}a, bottom row).
|
||||
Input $\raw(t)$ consists of a song of \textit{P.
|
||||
parallelus} recorded in the field at eight different
|
||||
distances $d$ and is processed up to the feature set
|
||||
$f_i(t)$. Different color shades indicate different types
|
||||
of Gabor kernels with specific lobe number $\kn$ and
|
||||
either $+$ or $-$ sign, sorted (dark to light) first by
|
||||
increasing $\kn$ and then by
|
||||
$f_i(t)$ using kernel-specific threshold values
|
||||
$\thr=2\cdot\sigma_{\eta_i}$ (appendix
|
||||
Fig.\,\ref{fig:app_field_kern-sd}). Different color shades
|
||||
indicate different types of Gabor kernels with specific
|
||||
lobe number $\kn$ and either $+$ or $-$ sign, sorted (dark
|
||||
to light) first by increasing $\kn$ and then by
|
||||
sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
|
||||
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
|
||||
$16\,$ms per type; 8 types, 40 kernels in total).
|
||||
$16\,$ms per type; 8 types, 40 $k_i(t)$ in total).
|
||||
\textbf{a}:~$\filt(t)$, $\env(t)$, $\db(t)$, $\adapt(t)$,
|
||||
$c_i(t)$, and $f_i(t)$ at each $d$. A noise segment from
|
||||
the same recording is shown for reference.
|
||||
\textbf{b}:~Intensity measures over $d$. For $c_i(t)$
|
||||
and $f_i(t)$, the median over kernels is shown.
|
||||
\textbf{c}:~Average value $\mu_{f_i}$ of each feature
|
||||
$f_i(t)$ over $d$.
|
||||
\textbf{d}:~Ratios of intensity measures to the respective
|
||||
value obtained from the noise reference. For $c_i(t)$ and
|
||||
$f_i(t)$, the median over kernel-specific ratios is shown.
|
||||
\textbf{e}:~Ratios of standard deviation $\sigma_{c_i}$ of
|
||||
\textbf{b}:~Intensity measures over $d$. The median over
|
||||
$k_i(t)$ is shown for $c_i(t)$ and $f_i(t)$.
|
||||
\textbf{c}:~Average value $\muf$ of each $f_i(t)$ over
|
||||
$d$.
|
||||
\textbf{d}:~Ratio of intensity measures from \textbf{b} to
|
||||
the respective value obtained from the noise reference.
|
||||
\textbf{e}:~Ratio of standard deviation $\sigma_{c_i}$ of
|
||||
each $c_i(t)$.
|
||||
\textbf{f}:~Ratios of $\mu_{f_i}$.
|
||||
\textbf{f}:~Ratios of $\muf$.
|
||||
}
|
||||
\label{fig:pipeline_field}
|
||||
\end{figure}
|
||||
@@ -1470,8 +1462,8 @@ song, so that each dot within a subplot corresponds to a single feature
|
||||
$f_i(t)$. For the intraspecific
|
||||
comparisons~(Fig.\,\ref{fig:feat_cross_species}, upper triangular), the pairs
|
||||
of $\muf$ are distributed closely around the diagonal, with a minimum
|
||||
correlation coefficient of $\rho=0.85$, a maximum of $\rho=0.99$, and a median
|
||||
of $\rho=0.92$. A given $f_i(t)$ thus tends to have a similar $\muf$ across
|
||||
correlation coefficient of $\rho=0.82$, a maximum of $\rho=0.99$, and a median
|
||||
of $\rho=0.91$. A given $f_i(t)$ thus tends to have a similar $\muf$ across
|
||||
different songs of the same species. In contrast, the pairs of $\muf$ for the
|
||||
interspecific comparisons~(Fig.\,\ref{fig:feat_cross_species}, lower
|
||||
triangular) are distributed in a variety of different ways, most in broader
|
||||
@@ -1479,7 +1471,7 @@ clouds (e.g. \textit{C. biguttulus} vs. \textit{C. mollis}) but some more
|
||||
narrowly around the diagonal (e.g. \textit{P. parallelus} vs. \textit{C.
|
||||
dispar}). The correlation coefficients $\rho$ vary widely between different
|
||||
interspecific comparisons, with a minimum of $\rho=-0.1$, a maximum of
|
||||
$\rho=0.92$, and a median of $\rho=0.53$. A given $f_i(t)$ therefore tends to
|
||||
$\rho=0.91$, and a median of $\rho=0.40$. A given $f_i(t)$ therefore tends to
|
||||
have a less similar $\muf$ across different species than within the same
|
||||
species, although certain exeptions exist~(Fig.\,\ref{fig:feat_cross_species},
|
||||
lower right). Accordingly, the feature representation that is generated by the
|
||||
@@ -1498,18 +1490,17 @@ natural song variation.
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_features_cross_species.pdf}
|
||||
\caption{\textbf{Interspecific and intraspecific feature variability.}
|
||||
Average value $\mu_{f_i}$ of each feature $f_i(t)$ against
|
||||
its counterpart from a 2nd feature set based on a
|
||||
different input $\raw(t)$. Each dot within a subplot
|
||||
represents a single feature $f_i(t)$. Different color
|
||||
Average value $\muf$ of each feature $f_i(t)$ against its
|
||||
counterpart from a 2nd feature set based on a different
|
||||
input $\raw(t)$. Data is based on the saturated $\muf$
|
||||
from Fig.\,\ref{fig:pipeline_full}. Each dot within a
|
||||
subplot represents a single $f_i(t)$. Different color
|
||||
shades indicate different types of Gabor kernels with
|
||||
specific lobe number $\kn$ and either $+$ or $-$ sign,
|
||||
sorted (dark to light) first by increasing $\kn$ and then
|
||||
by sign~($1\,\leq\,\kn\,\leq\,4$; first $+$, then $-$ for
|
||||
each $\kn$; five kernel widths $\kw$ of 1, 2, 4, 8, and
|
||||
$16\,$ms per type; 8 types, 40 kernels in total). Data is
|
||||
based on the analysis underlying
|
||||
Fig\,\ref{fig:pipeline_full}.
|
||||
$16\,$ms per type; 8 types, 40 kernels in total).
|
||||
\textbf{Lower triangular}:~Interspecific comparisons
|
||||
between single songs of different species.
|
||||
\textbf{Upper triangular}:~Intraspecific comparisons
|
||||
@@ -1524,7 +1515,8 @@ natural song variation.
|
||||
\end{figure}
|
||||
\FloatBarrier
|
||||
|
||||
\section{Conclusions \& outlook}
|
||||
\newpage
|
||||
\section{Discussion}
|
||||
|
||||
% RIPPED FROM INTRODUCTION:
|
||||
|
||||
@@ -1677,105 +1669,189 @@ $\rightarrow$ Graded again but highly decorrelated from the acoustic stimulus\\
|
||||
$\rightarrow$ Parameters of a behavioral response may be graded (e.g. approach speed),
|
||||
initiation of one behavior over another is categorical (e.g. approach/stay)
|
||||
|
||||
\newpage
|
||||
\section{Appendix}
|
||||
|
||||
% Not sure if we really need this one. Might raise more questions than it
|
||||
% provides answers. The noise component is not stable throughout nonlinear
|
||||
% transformations, that is all the reader needs to know, i believe.
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_noise_env_sd_conversion_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
}
|
||||
\caption{\textbf{Conversion of the noise component by envelope extraction.}
|
||||
Standard deviation $\sigma_{\eta}$ of noise component
|
||||
$\noc(t)$ within the signal envelope $\env(t)$ over scale
|
||||
$\sca$. Based on input $\raw(t)$ with $\sigma_{\eta}=1$
|
||||
(corresponding to the analysis underlying
|
||||
Fig.\,\ref{fig:rect-lp}), using 100 realizations of
|
||||
$\noc(t)$.}
|
||||
\label{fig:app_env-sd}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_rect-lp_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
}
|
||||
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:rect-lp}e.}
|
||||
Ratio of the standard deviation $\sigma_{\text{env}}$ to
|
||||
the pure-noise reference $\sigma_{\eta}$ of the signal
|
||||
envelope $\env(t)$ over scale $\sca$ for different cutoff
|
||||
frequencies $\fc$ of the lowpass filter extracting
|
||||
$\env(t)$. Solid lines and shaded areas indicate mean
|
||||
$\pm$ standard deviation across songs per recording.
|
||||
Dashed lines indicate mean across recordings (shown in
|
||||
Fig.\,\ref{fig:rect-lp}e).}
|
||||
\label{fig:app_rect-lp}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_log-hp_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
}
|
||||
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:log-hp}e.}
|
||||
Ratio of the standard deviation $\sigma_{\text{adapt}}$ to
|
||||
the pure-noise reference $\sigma_{\eta}$ of the
|
||||
intensity-adapted envelope $\adapt(t)$ over scale $\sca$.
|
||||
Solid lines and shaded areas indicate mean $\pm$ standard
|
||||
deviation across songs per recording. Dashed lines
|
||||
indicate mean across recordings (shown in
|
||||
Fig.\,\ref{fig:log-hp}e).}
|
||||
\label{fig:app_log-hp_curves}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_saturation_log-hp_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
}
|
||||
\caption{\textbf{Species-specific saturation points underlying
|
||||
Fig.\,\ref{fig:log-hp}e.}
|
||||
Distribution of saturation points ($95\,\%$ curve span) of
|
||||
ratio $\sigma_{\text{adapt}} / \sigma_{\eta}$ of the
|
||||
intensity-adapted envelope $\adapt(t)$ over scale $\sca$
|
||||
across all available songs. Dots indicate the saturation
|
||||
point of the mean curve across songs and recordings (shown
|
||||
in Fig.\,\ref{fig:log-hp}e, see also appendix
|
||||
Fig.\,\ref{fig:app_log-hp_curves}).}
|
||||
\label{fig:app_log-hp_saturation}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_pure_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
}
|
||||
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}bd.}
|
||||
Average value $\muf$ of each of the three features
|
||||
$f_i(t)$ over scale $\sca$ in the noiseless case. Solid
|
||||
lines and shaded areas indicate mean $\pm$ standard
|
||||
deviation across songs per recording. Dashed lines
|
||||
indicate mean across recordings (shown in
|
||||
Fig.\,\ref{fig:thresh-lp_species}bd).}
|
||||
\label{fig:app_thresh-lp_pure}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_thresh-lp_noise_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
}
|
||||
\caption{\textbf{Species-specific data underlying Fig.\,\ref{fig:thresh-lp_species}ce.}
|
||||
Average value $\muf$ of each of the three features
|
||||
$f_i(t)$ over scale $\sca$ in the noisy case. Solid lines
|
||||
and shaded areas indicate mean $\pm$ standard deviation
|
||||
across songs per recording. Dashed lines indicate mean
|
||||
across recordings (shown in
|
||||
Fig.\,\ref{fig:thresh-lp_species}ce).}
|
||||
\label{fig:app_thresh-lp_noise}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_thresh_lp_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
\caption{\textbf{Relation between threshold value and pure-noise feature
|
||||
value for Fig.\,\ref{fig:thresh-lp_single} and
|
||||
Fig.\,\ref{fig:thresh-lp_species}.}
|
||||
Proportion of pure-noise kernel response $c_i(t)$ that
|
||||
exceeds threshold value $\thr$ --- which determines the
|
||||
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
|
||||
in multiples of standard deviation $\sigma_{c_i}$.
|
||||
Corresponds to a "reverse" cumulative distribution
|
||||
function of $c_i(t)$. Black solid lines indicate rCDF per
|
||||
kernel $k_i(t)$. Red dashed line indicates rCDF for a
|
||||
normal distribution with $\mu=0$ and $\sigma=1$.
|
||||
}
|
||||
\label{fig:app_thresh-lp_kern-sd}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_full_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
\caption{\textbf{Relation between threshold value and pure-noise feature
|
||||
value for Fig.\,\ref{fig:pipeline_full}.}
|
||||
Proportion of pure-noise kernel response $c_i(t)$ that
|
||||
exceeds threshold value $\thr$ --- which determines the
|
||||
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
|
||||
in multiples of standard deviation $\sigma_{c_i}$.
|
||||
Corresponds to a "reverse" cumulative distribution
|
||||
function of $c_i(t)$. Black solid lines indicate rCDF per
|
||||
kernel $k_i(t)$. Red dashed line indicates rCDF for a
|
||||
normal distribution with $\mu=0$ and $\sigma=1$.
|
||||
}
|
||||
\label{fig:app_full_kern-sd}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_short_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
\caption{\textbf{Relation between threshold value and pure-noise feature
|
||||
value for Fig.\,\ref{fig:pipeline_short}.}
|
||||
Proportion of pure-noise kernel response $c_i(t)$ that
|
||||
exceeds threshold value $\thr$ --- which determines the
|
||||
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
|
||||
in multiples of standard deviation $\sigma_{c_i}$.
|
||||
Corresponds to a "reverse" cumulative distribution
|
||||
function of $c_i(t)$. Black solid lines indicate rCDF per
|
||||
kernel $k_i(t)$. Red dashed line indicates rCDF for a
|
||||
normal distribution with $\mu=0$ and $\sigma=1$.
|
||||
}
|
||||
\label{fig:app_short_kern-sd}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_kernel_sd_perc_field_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
\caption{\textbf{Relation between threshold value and pure-noise feature
|
||||
value for Fig.\,\ref{fig:pipeline_field}.}
|
||||
Proportion of pure-noise kernel response $c_i(t)$ that
|
||||
exceeds threshold value $\thr$ --- which determines the
|
||||
average value $\muf$ of feature $f_i(t)$ --- over $\thr$
|
||||
in multiples of standard deviation $\sigma_{c_i}$.
|
||||
Corresponds to a "reverse" cumulative distribution
|
||||
function of $c_i(t)$. Black solid lines indicate rCDF per
|
||||
kernel $k_i(t)$. Red dashed line indicates rCDF for a
|
||||
normal distribution with $\mu=0$ and $\sigma=1$.
|
||||
}
|
||||
\label{fig:app_field_kern-sd}
|
||||
\end{figure}
|
||||
\end{figure}% Referenced.
|
||||
\FloatBarrier
|
||||
|
||||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{figures/fig_invariance_cross_species_thresh_appendix.pdf}
|
||||
\caption{\textbf{}
|
||||
\caption{\textbf{Threshold-dependent intensity invariance of
|
||||
species-specific feature sets.}
|
||||
Same input and processing as in
|
||||
Fig.\,\ref{fig:pipeline_full}, using different
|
||||
kernel-specific threshold values $\thr$ (multiples of
|
||||
pure-noise standard deviation $\sigma_{\eta_i}$ of
|
||||
$c_i(t)$ for $\sca=0$. See also appendix
|
||||
Fig.\,\ref{fig:app_full_kern-sd}). Average value $\muf$ of
|
||||
each feature $f_i(t)$ over $\sca$.
|
||||
}
|
||||
\label{fig:app_cross_species_thresh}
|
||||
\end{figure}
|
||||
\end{figure}% Reference this one!
|
||||
\FloatBarrier
|
||||
|
||||
\end{document}
|
||||
Reference in New Issue
Block a user