Finished the simulation part of the method sections.

This commit is contained in:
j-hartling
2026-05-13 17:07:51 +02:00
parent 5ef09bef6c
commit 688f153bef
10 changed files with 1349 additions and 1161 deletions

168
main.tex
View File

@@ -103,6 +103,9 @@
\newcommand{\xvar}{\sigma_{x}^{2}} % Variance of synthetic mixture
\newcommand{\svar}{\sigma_{\text{s}}^{2}} % Song component variance
\newcommand{\nvar}{\sigma_{\eta}^{2}} % Noise component variance
\newcommand{\xsig}{\sigma_x} % Standard deviation of synthetic mixture
\newcommand{\ssig}{\sigma_{\text{s}}} % Song component standard deviation
\newcommand{\nsig}{\sigma_{\eta}} % Noise component standard deviation
\newcommand{\pc}{p(c,\,T)} % Probability density (general interval)
\newcommand{\pclp}{p(c,\,\tlp)} % Probability density (lowpass interval)
\newcommand{\muf}{\mu_{f_i}} % Average feature value
@@ -558,7 +561,170 @@ can be read out by a simple linear classifier.
\end{figure}
\FloatBarrier
\subsubsection{Simulation-based analysis of the model pathway}
\subsection{Simulation-based analysis of the model pathway}
\subsubsection{Data sourcing}
All simulations were based on a dataset that was assembled from five different
sources, each of which is an established reference for the identification of
European grasshopper species. The dataset was limited to six species from the
species-rich \textit{Gomphocerinae} sub-family that are known to be common
throughout Central and Southern Europe. All recordings were converted to
standard~\textit{.wav}~format with a sampling rate of~44.1\,kHz and an
amplitude scale in arbitrary units. Individual songs were then cut from each
recording. The dataset includes a total of 31 recordings across species, which
amounts to a total of 153 isolated songs. However, the number of available
species-specific songs varies greatly across species, with a maximum of 48
songs for \textit{C. biguttulus} and a minimum of 6 songs for \textit{C.
mollis}~(Tab.\,\ref{tab:species_list}).
\begin{itemize}
\item "Heuschrecken beobachten, bestimmen" by~Heiko~Bellmann\\
1$^{\text{st}}$\,edition, 1993, Naturbuch, Augsburg
\item "Gesänge der heimischen Heuschrecken. Akustisch-optische
Bestimmungshilfe."\\
by~Karl-Heinz~Garberding, Deutscher Jugendbund für Naturbeobachtung\\
1$^{\text{st}}$\,edition, 2001, DJN, Göttingen
\item "Heuschrecken -- Die Stimmen von 61 heimischen Arten"
by~Heiko~Bellmann\\
1$^{\text{st}}$\,edition, 2004, AMPLE, Germering
\item "Fauna d'Italia XLVIII -- Orthoptera" by~Bruno~Massa, Paolo~Fontana,
Filippo~M.~Buzzetti, Roy~M.J.C.~Kleukers, Baudewijn~Odé\\
1$^{\text{st}}$\,edition, 2012, edagricola, Milano
\item "Singing Orthoptera of Slovenia" by~Stanislav~Gomboc, Blaz~Segula\\
1$^{\text{st}}$\,edition, 2014, EGEA, Ljubljana
\end{itemize}
\begin{table}[!ht]
\centering
\captionsetup{width=.75\textwidth}
\caption{Overview of the six grasshopper species from the
\textit{Gomphocerinae} sub-family, the number of sources per species, the
number of available recordings across sources, and the number of isolated
songs across recordings.}
\begin{tabular}{|lccc|}
\hline
\textbf{Species} & \textbf{Sources} & \textbf{Recordings} & \textbf{Songs}\\
\hline
\textit{Chorthippus biguttulus} & 5 & 6 & 48\\
\textit{Chorthippus mollis} & 3 & 3 & 6\\
\textit{Chrysochraon dispar} & 4 & 5 & 45\\
\textit{Gomphocerippus rufus} & 4 & 8 & 16\\
\textit{Omocestus rufipes} & 4 & 5 & 14\\
\textit{Pseudochorthippus parallelus} & 4 & 4 & 24\\
\hline
\end{tabular}
\label{tab:species_list}
\end{table}
\subsubsection{Generating synthetic input signals}
Different processing steps along the model pathway were tested for intensity
invariance by generating synthetic input signals $x(t)$ of varying intensity,
transforming them through the respective processing steps, and comparing the
resulting signal representations. Inputs were generated for two distinct cases.
In the idealized, noiseless case, $x(t)$ consists of a song component $\soc(t)$
with $\ssig=1$ and a multiplicative scale $\sca$:
\begin{equation}
x(t)\,=\,\sca\,\cdot\,\soc(t), \qquad \sca\,\geq\,0
\label{eq:noiseless}
\end{equation}
In the noiseless case, $x(t)$ is hence only a scaled version of $\soc(t)$ with
$\xsig=\sca$. In the more realistic, noisy case, $x(t)$ consists of the same
song component $\soc(t)$ scaled by $\sca$ and an additive noise component
$\noc(t)$ with $\nsig=1$:
\begin{equation}
x(t)\,=\,\sca\,\cdot\,\soc(t)\,+\,\noc(t), \qquad \sca\,\geq\,0
\label{eq:noisy}
\end{equation}
Accordingly, the SNR of input $x(t)$ in the noisy case equals the squared
$\sca$ value:
% Make sure that SNR = signal-to-noise ratio is introduced somewhere!
\begin{equation}
\text{SNR}_x(\sca)\,=\,\frac{(\sca\,\cdot\,\ssig)^2}{\nsig^2}\,=\,\sca^2, \qquad \ssig\,=\,\nsig\,=\,1
\label{eq:input_snr}
\end{equation}
For most analyses, it would be sufficient if input $x(t)$ corresponds to the
signal representation immediately before the first of the tested
transformations. For instance, when testing the effects of logarithmic
compression~(Eq.\,\ref{eq:log}), $x(t)$ would correspond to the signal envelope
$\env(t)$. However, in this particular case, $\env(t)$ results from a nonlinear
transformation~(Eq.\,\ref{eq:env}), which cannot be synthesized as an additive
mixture of $\soc(t)$ and $\noc(t)$. For this reason, any input $x(t)$ across
all analyses corresponds not to the representation immediately before the
tested transformations but its predecessor representation instead. Therefore,
when testing logarithmic compression, $x(t)$ corresponds to the tympanal signal
$\filt(t)$ instead of $\env(t)$.
The raw $\soc(t)$ was drawn from the dataset of isolated species-specific song
recordings, whereas the raw $\noc(t)$ consists of a segment of normally
distributed white noise. Both $\soc(t)$ and $\noc(t)$ were normalized to unit
standard deviation. These can be used without further processing for all
analyses where input $x(t)$ corresponds to $\raw(t)$. For analyses where $x(t)$
corresponds to a later representation, $\soc(t)$ and $\noc(t)$ were first
processed along the model pathway up to the required representation, again
normalized to unit standard deviation, and then used to generate $x(t)$
according to either Eq.\,\ref{eq:noiseless} in the noiseless case or
Eq.\,\ref{eq:noisy} in the noisy case.
\subsubsection{Quantifying signal intensity across representations}
All intensity measures were calculated over a manually labeled segment within
each song. Segments always excluded the first and last few syllables to allow
slowly changing representations such as $f_i(t)$ to stabilize. The duration of
each segment and the number of contained syllables depends on the duration of
the species-specific song. Care was taken to ensure that the segment contained
a sufficient number of syllables to obtain a reliable estimate of the intensity
measures.
The standard deviation $\sigma$ was used as a measure of intensity for all
representations resulting from the transformation of input $x(t)$ up to and
including the kernel responses $c_i(t)$, for which individual $\sigma_{c_i}$
were used as kernel-specific intensity measures. The binary responses $b_i(t)$
were deemed to similar to the features $f_i(t)$ to warrant their own intensity
measure and were hence omitted from all related analyses. For $f_i(t)$,
$\sigma$ is not an appropriate intensity measure because each $f_i(t)$ is
ideally constant with $\sigma=0$ for the duration of a song. Therefore, the
average value $\muf$ of each $f_i(t)$ was used as a kernel-specific intensity
measure instead.
It is arguably not ideal to quantify the intensity of $c_i(t)$ and $f_i(t)$
separately for each kernel. Overall, these representations are not separate
signals bundled together but rather a set that acts as a unit with a single
intensity measure. However, there is no straightforward way to quantify the
intensity of $c_i(t)$ or $f_i(t)$ as a whole that would not entail a certain
ambiguity, e.\,g by averaging across kernels. In this sense, we opted for the
kernel-specific approach because it allows to asses differences in the
dependency on $\sca$ between individual members of either $c_i(t)$ and
$f_i(t)$.
The absolute intensity measures allow to compare the intensity of a
representation across different $\sca$ values. Additionally, ratios were
calculated between the intensity measures for $\sca>0$ and the respective
pure-noise reference measure for $\sca=0$ to better compare the intensities of
different representations. This is only possible in the noisy case, where input
$x(t)=\noc(t)$ for $\sca=0$, whereas $x(t)=0$ for $\sca=0$ in the noiseless
case. At the level of input $x(t)$, the ratio of intensity measures depends on
the square root of $\sca$:
\begin{equation}
\frac{\xsig}{\nsig}\,=\,\sqrt{\frac{\xsig^2}{\nsig^2}}\,=\,\sqrt{\frac{(\sca\,\cdot\,\ssig)^2\,+\,\nsig^2}{\nsig^2}}\,=\,\sqrt{\sca^2\,+\,1}, \qquad \ssig\,=\,\nsig\,=\,1
\label{eq:input_ratio}
\end{equation}
This holds only if $\soc(t)\perp\noc(t)$, so that $\xsig^2=\ssig^2+\nsig^2$,
which is a reasonable assumption for the raw $\soc(t)$ and $\noc(t)$. However,
the dependency of the ratio on $\sca$ is not necessarily the same for
representations that are transformed from $x(t)$ by nonlinear operations, since
these change the relationship of $\soc(t)$ and $\noc(t)$ in an unpredictable
fashion. Furthermore, the ratio is not a proper SNR of the representation
because it does not relate $\soc(t)$ to $\noc(t)$ within the representation but
rather the entire representation to $\noc(t)$ alone. However, it still provides
a useful measure of the relative intensity of a representation with and without
$\soc(t)$, which is the closest we can get to the SNR of the representation. As
such, the ratio of intensity measures is referred to as SNR in the following.
% Is this legal? "SNR" is much shorter than "ratio of intensity measure to the pure-noise reference measure".
% Haven't used it much yet, sticked to "ratio" in most cases.
\subsection{Field data-based analysis of the model pathway}
\section{Results}