Added and described two single-kernel Thresh-LP invariance figures in main.tex.

2026-03-11 15:21:22 +01:00
parent 4494bc7783
commit 4f5054c8fd
16 changed files with 4958 additions and 274 deletions
--- a/main.tex
+++ b/main.tex
@@ -625,11 +625,11 @@ the signal for reliable song recognition.
    \includegraphics[width=\textwidth]{figures/fig_invariance_log_hp.pdf}
    \caption{\textbf{Intensity invariance by logarithmic compression and
                     adaptation is restricted by the noise floor.}
-                     Synthetic envelope $\env(t)$ is transformed into
-                     logarihmically compressed envelope $\db(t)$ and further
-                     into intensity-adapted envelope $\adapt(t)$. Indicated
-                     time scale is $5\,$s for both \textbf{a} and \textbf{b}
-                     (black bars).
+                     Envelope $\env(t)$ is transformed into logarihmically
+                     compressed envelope $\db(t)$ and further into
+                     intensity-adapted envelope $\adapt(t)$. Indicated time
+                     scale is $5\,$s for both \textbf{a} and \textbf{b} (black
+                     bars).
                     \textbf{a}:~Ideally, if $\env(t)$ consists only of song
                     component $\soc(t)$ rescaled by $\sca$, then $\adapt(t)$
                     is fully intensity-invariant across all $\sca$.
@@ -649,6 +649,51 @@ the signal for reliable song recognition.

 \subsection{Thresholding nonlinearity \& temporal averaging}

+\begin{figure}[!ht]
+    \centering
+    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh_lp_single.pdf}
+    \caption{\textbf{Intensity invariance by thresholding and temporal
+                     averaging depends on the threshold value.}
+                     Kernel response $c_i(t)$ is rescaled by $\sca$ and
+                     transformed into binary response $b_i(t)$ and further into
+                     feature $f_i(t)$. Threshold value $\thr$ is set to
+                     different percentiles of the the distribution of $c_i(t)$
+                     at $\sca=1$. Darker colors indicate higher values of
+                     $\thr$. Indicated time scale of $500\,$ms is the same for
+                     \textbf{a}-\textbf{c} (black bar).
+                     \textbf{a}:~50th percentile.
+                     \textbf{b}:~75th percentile.
+                     \textbf{c}:~100th percentile.
+                     \textbf{d}:~Average value of $f_i(t)$ during the song for
+                     the different $\thr$ in \textbf{a}-\textbf{c}.
+                     }
+    \label{fig:inv_thresh-lp_single}
+\end{figure}
+\FloatBarrier
+
+\begin{figure}[!ht]
+    \centering
+    \includegraphics[width=\textwidth]{figures/fig_invariance_thresh_lp_single_noise.pdf}
+    \caption{\textbf{Intensity invariance by thresholding and temporal
+                     averaging depends on noise.}
+                     Kernel response $c_i(t)$ is rescaled by $\sca$, mixed with
+                     fixed-scale noise component $\noc(t)$, and transformed
+                     into binary response $b_i(t)$ and further into feature
+                     $f_i(t)$. Threshold value $\thr$ is set to different
+                     percentiles of the the distribution of $c_i(t)$ at
+                     $\sca=1$. Darker colors indicate higher values of $\thr$.
+                     Indicated time scale of $500\,$ms is the same for
+                     \textbf{a}-\textbf{c} (black bar).
+                     \textbf{a}:~50th percentile.
+                     \textbf{b}:~75th percentile.
+                     \textbf{c}:~100th percentile.
+                     \textbf{d}:~Average value of $f_i(t)$ during the song for
+                     the different $\thr$ in \textbf{a}-\textbf{c}.
+                     }
+    \label{fig:inv_thresh-lp_single_noise}
+\end{figure}
+\FloatBarrier
+
 The second key mechanism for the emergence of intensity invariance along the
 model pathway takes place during the transformation of the kernel responses
 $c_i(t)$ over the binary responses $b_i(t)$ into the finalized features