Added and described two single-kernel Thresh-LP invariance figures in main.tex.

This commit is contained in:
j-hartling
2026-03-11 15:21:22 +01:00
parent 4494bc7783
commit 4f5054c8fd
16 changed files with 4958 additions and 274 deletions

View File

@@ -625,11 +625,11 @@ the signal for reliable song recognition.
\includegraphics[width=\textwidth]{figures/fig_invariance_log_hp.pdf}
\caption{\textbf{Intensity invariance by logarithmic compression and
adaptation is restricted by the noise floor.}
Synthetic envelope $\env(t)$ is transformed into
logarihmically compressed envelope $\db(t)$ and further
into intensity-adapted envelope $\adapt(t)$. Indicated
time scale is $5\,$s for both \textbf{a} and \textbf{b}
(black bars).
Envelope $\env(t)$ is transformed into logarihmically
compressed envelope $\db(t)$ and further into
intensity-adapted envelope $\adapt(t)$. Indicated time
scale is $5\,$s for both \textbf{a} and \textbf{b} (black
bars).
\textbf{a}:~Ideally, if $\env(t)$ consists only of song
component $\soc(t)$ rescaled by $\sca$, then $\adapt(t)$
is fully intensity-invariant across all $\sca$.
@@ -649,6 +649,51 @@ the signal for reliable song recognition.
\subsection{Thresholding nonlinearity \& temporal averaging}
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_thresh_lp_single.pdf}
\caption{\textbf{Intensity invariance by thresholding and temporal
averaging depends on the threshold value.}
Kernel response $c_i(t)$ is rescaled by $\sca$ and
transformed into binary response $b_i(t)$ and further into
feature $f_i(t)$. Threshold value $\thr$ is set to
different percentiles of the the distribution of $c_i(t)$
at $\sca=1$. Darker colors indicate higher values of
$\thr$. Indicated time scale of $500\,$ms is the same for
\textbf{a}-\textbf{c} (black bar).
\textbf{a}:~50th percentile.
\textbf{b}:~75th percentile.
\textbf{c}:~100th percentile.
\textbf{d}:~Average value of $f_i(t)$ during the song for
the different $\thr$ in \textbf{a}-\textbf{c}.
}
\label{fig:inv_thresh-lp_single}
\end{figure}
\FloatBarrier
\begin{figure}[!ht]
\centering
\includegraphics[width=\textwidth]{figures/fig_invariance_thresh_lp_single_noise.pdf}
\caption{\textbf{Intensity invariance by thresholding and temporal
averaging depends on noise.}
Kernel response $c_i(t)$ is rescaled by $\sca$, mixed with
fixed-scale noise component $\noc(t)$, and transformed
into binary response $b_i(t)$ and further into feature
$f_i(t)$. Threshold value $\thr$ is set to different
percentiles of the the distribution of $c_i(t)$ at
$\sca=1$. Darker colors indicate higher values of $\thr$.
Indicated time scale of $500\,$ms is the same for
\textbf{a}-\textbf{c} (black bar).
\textbf{a}:~50th percentile.
\textbf{b}:~75th percentile.
\textbf{c}:~100th percentile.
\textbf{d}:~Average value of $f_i(t)$ during the song for
the different $\thr$ in \textbf{a}-\textbf{c}.
}
\label{fig:inv_thresh-lp_single_noise}
\end{figure}
\FloatBarrier
The second key mechanism for the emergence of intensity invariance along the
model pathway takes place during the transformation of the kernel responses
$c_i(t)$ over the binary responses $b_i(t)$ into the finalized features