Done bullet-pointing LHP invariance, nearly done with THP invariance.

2025-11-13 16:52:22 +01:00
parent 53e43f61f5
commit 2b9f411a06
6 changed files with 113 additions and 48 deletions
--- a/main.tex
+++ b/main.tex
@@ -29,8 +29,8 @@ style=authoryear,
 \newcommand{\adapt}{\raw_{\text{adapt}}} % Adapted signal

 \newcommand{\dec}{\log_{10}} % Logarithm base 10
-\newcommand{\sigs}{\sigma_{\text{s}}} % Song standard deviation
-\newcommand{\sign}{\sigma_{\eta}} % Noise standard deviation
+\newcommand{\svar}{\sigma_{\text{s}}^{2}} % Song signal variance
+\newcommand{\nvar}{\sigma_{\eta}^{2}} % Noise signal variance
 \newcommand{\infint}{\int_{-\infty}^{+\infty}} % Indefinite integral
 \newcommand{\thr}{\Theta_i} % Step function threshold value
 \newcommand{\nl}{H(c_i\,-\,\thr)} % Shifted Heaviside step function
@@ -105,33 +105,36 @@ Initial: Continuous acoustic input signal $x(t)$

 Filtering of behaviorally relevant frequencies by tympanal membrane\\
 $\rightarrow$ Bandpass filter 5-30 kHz
+%
 \begin{equation}
    \filt(t)\,=\,\raw(t)\,*\,\bp, \qquad \fc\,=\,5\,\text{kHz},\,30\,\text{kHz}
    \label{eq:bandpass}
 \end{equation}
-
+%
 Extraction of signal envelope (AM encoding) by receptor population\\
 $\rightarrow$ Full-wave rectification, then lowpass filter 500 Hz
+%
 \begin{equation}
    \env(t)\,=\,|\filt(t)|\,*\,\lp, \qquad \fc\,=\,500\,\text{Hz}
    \label{eq:env}
 \end{equation}
-
+%
 Logarithmically compressed intensity tuning curve of receptors\\
 $\rightarrow$ Decibel transformation
+%
 \begin{equation}
    \db(t)\,=\,10\,\cdot\,\dec \frac{\env(t)}{\dbref}, \qquad \dbref\,=\,\max[\env(t)]
    \label{eq:log}
 \end{equation}
-
+%
 Spike-frequency adaptation in receptor and interneuron populations\\
 $\rightarrow$ Highpass filter 10 Hz
+%
 \begin{equation}
    \adapt(t)\,=\,\db(t)\,*\,\hp, \qquad \fc\,=\,10\,\text{Hz}
    \label{eq:highpass}
 \end{equation}
-
-
+%
 \subsection{Feature extraction by individual neurons}

 "Post-split portion" of the auditory pathway:\\
@@ -147,20 +150,24 @@ $\rightarrow$ Individual neuron-specific response traces from this stage onwards
 Template matching by individual ANs\\
 - Filter base (STA approximations): Set of Gabor kernels\\
 - Gabor parameters: $\sigma, \phi, f$ $\rightarrow$ Determines kernel sign and lobe number
+%
 \begin{equation}
    k(t)\,=\,e^{-\frac{t^{2}}{2\sigma^{2}}}\,\cdot\,\sin(2\pi f t\,+\,\phi)
    \label{eq:gabor}
 \end{equation}
+%
 $\rightarrow$ Separate convolution with each member of the kernel set
+%
 \begin{equation}
    c_i(t)\,=\,\adapt(t)\,*\,k_i(t)
    = \infint \adapt(\tau)\,\cdot\,k_i(t\,-\,\tau)\,d\tau
    \label{eq:conv}
 \end{equation}
-
+%
 Thresholding nonlinearity in ascending neurons (or further downstream)\\
 - Binarization of AN response traces into "relevant" vs. "irrelevant"\\
 $\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?)
+%
 \begin{equation}
    \bi(t)\,=\,\begin{cases}
        \;1, \quad c_i(t)\,>\,\thr\\
@@ -168,17 +175,18 @@ $\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?
    \end{cases}
    \label{eq:binary}
 \end{equation}
-
+%
 Temporal averaging by neurons of the central brain\\
 - Finalized set of slowly changing kernel-specific features (one per AN)\\
 - Different species-specific song patterns are characterized by a distinct combination
 of feature values $\rightarrow$ Clusters in high-dimensional feature space\\
 $\rightarrow$ Lowpass filter 1 Hz
+%
 \begin{equation}
    \feat(t)\,=\,\bi(t)\,*\,\lp, \qquad \fc\,=\,1\,\text{Hz}
    \label{eq:lowpass}
 \end{equation}
-
+%
 \section{Two mechanisms driving the emergence of intensity-invariant song representation}

 \subsection{Logarithmic scaling \& spike-frequency adaptation}
@@ -186,38 +194,69 @@ $\rightarrow$ Lowpass filter 1 Hz
 Envelope $\env(t)$ $\xrightarrow{\text{dB}}$ Logarithmic $\db(t)$ $\xrightarrow{\hp}$ Adapted $\adapt(t)$

 - Rewrite signal envelope $\env(t)$ (Eq.\,\ref{eq:env}) as a synthetic mixture:\\
-1) Song signal $s(t)$ ($\sigs=1$) with variable multiplicative scale $\alpha\geq0$\\
-2) Fixed-scale additive noise $\eta(t)$ ($\sign=1$)
+1) Song signal $s(t)$ ($\svar=1$) with variable multiplicative scale $\alpha\geq0$\\
+2) Fixed-scale additive noise $\eta(t)$ ($\nvar=1$)
+%
 \begin{equation}
    \env(t)\,=\,\alpha\,\cdot\,s(t)\,+\,\eta(t),\qquad \env(t)\,>\,0\enspace\forall\enspace t\,\in\,\mathbb{R}
    \label{eq:toy_env}
 \end{equation}
-
+%
+- Signal-to-noise ratio (SNR): Ratio of variances of synthetic mixture
+$\env(t)$ with ($\alpha>0$) and without ($\alpha=0$) song signal $s(t)$, assuming $s(t)\perp\eta(t)$
+%
+\begin{equation}
+    \text{SNR}\,=\,\frac{\sigma_{s+\eta}^{2}}{\nvar}\,=\,\frac{\alpha^{2}\,\cdot\,\svar\,+\,\nvar}{\nvar}\,=\,\alpha^{2}\,+\,1
+    \label{eq:toy_snr}
+\end{equation}
+%
 \textbf{Logarithmic component:}\\
- Apply decibel transformation (Eq.\,\ref{eq:log}) to synthetic $\env(t)$\\
+- Simplify decibel transformation (Eq.\,\ref{eq:log}) and apply to synthetic $\env(t)$\\
 - Isolate scale $\alpha$ and reference $\dbref$ using logarithm product/quotient laws
+%
 \begin{equation}
    \begin{split}
-        \db(t)\,&=\,10\,\cdot\,\dec \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\
-        &=\,10\,\cdot\,\big(\dec \frac{\alpha}{\dbref}\,+\,\dec[s(t)\,+\,\frac{\eta(t)}{\alpha}]\big)
+        \db(t)\,&=\,\log \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\
+        &=\,\log \frac{\alpha}{\dbref}\,+\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big]
    \end{split}
    \label{eq:toy_log}
 \end{equation}
+%
 $\rightarrow$ In log-space, a multiplicative scaling factor becomes additive\\
 $\rightarrow$ Allows for the separation of song signal $s(t)$ and its scale $\alpha$\\
 $\rightarrow$ Introduces scaling of noise term $\eta(t)$ by the inverse of $\alpha$\\
 $\rightarrow$ Normalization by $\dbref$ applies equally to all terms (no individual effects)

 \textbf{Adaptation component:}\\
- Highpass filter over logarithmically scaled $\db(t)$ (Eq.\,\ref{eq:highpass}) can
-be approximated as subtraction of the signal offset (DC removal) within a suitable
-time interval $\thp$ ($0 < \thp < \frac{1}{\fc}$)\\
+- Highpass filter over $\db(t)$ (Eq.\,\ref{eq:highpass}) can
+be approximated as subtraction of the local signal offset within a suitable time
+interval $\thp$ ($0 \ll \thp < \frac{1}{\fc}$)
+%
 \begin{equation}
    \begin{split}
-    \adapt(t)\,\approx\,\db(t)\,-\,\dec \frac{\alpha}{\dbref}\,=\,\dec{[s(t)\,+\,\frac{\eta(t)}{\alpha}]}
+    \adapt(t)\,\approx\,\db(t)\,-\,\log \frac{\alpha}{\dbref}\,=\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big]
    \end{split}
    \label{eq:toy_highpass}
 \end{equation}
+%
+\textbf{Implication for intensity invariance:}\\
+- Logarithmic scaling is essential for equalizing different song intensities\\
+$\rightarrow$ Intensity information can be manipulated more easily when in form
+of a signal offset in log-space than a multiplicative scale in linear space
+
+- Scale $\alpha$ can only be redistributed, not entirely eliminated from $\adapt(t)$\\
+$\rightarrow$ Turn initial scaling of song $s(t)$ by $\alpha$ into scaling of noise $\eta(t)$ by $\frac{1}{\alpha}$
+
+- Capability to compensate for intensity variations, i.e. selective amplification
+of output $\adapt(t)$ relative to input $\env(t)$, is limited by input SNR (Eq.\,\ref{eq:toy_snr}):\\
+$\alpha\gg1$: Attenuation of $\eta(t)$ term $\rightarrow$ $s(t)$ dominates $\adapt(t)$\\
+$\alpha\approx1$ Negligible effect on $\eta(t)$ term $\rightarrow$ $\adapt(t)=\log[s(t)+\eta(t)]$\\
+$\alpha\ll1$: Amplification of $\eta(t)$ term $\rightarrow$ $\eta(t)$ dominates $\adapt(t)$\\
+$\rightarrow$ Ability to equalize between different sufficiently large scales of $s(t)$\\
+$\rightarrow$ Inability to recover $s(t)$ when initially masked by noise floor $\eta(t)$
+
+- Logarithmic scaling emphasizes small amplitudes (song onsets, noise floor) \\
+$\rightarrow$ Recurring trade-off: Equalizing signal intensity vs preserving initial SNR

 \subsection{Threshold nonlinearity \& temporal averaging}

@@ -227,35 +266,42 @@ Convolved $c_i(t)$ $\xrightarrow{\nl}$ Binary $\bi(t)$ $\xrightarrow{\lp}$ Featu
 - Within an observed time interval $T$, $c_i(t)$ follows probability density $\pc$\\
 - Within $T$, $c_i(t)$ exceeds threshold value $\thr$ for time $T_1$ ($T_1+T_0=T$)\\
 - Threshold $\nl$ splits $\pc$ around $\thr$ in two complementary parts
+%
 \begin{equation}
    \int_{\thr}^{+\infty} p(c_i,T)\,dc_i\,=\,1\,-\,\int_{-\infty}^{\thr} p(c_i,T)\,dc_i\,=\,\frac{T_1}{T}
    \label{eq:pdf_split}
 \end{equation}
+%
 $\rightarrow$ Semi-definite integral over right-sided portion of split $\pc$ gives ratio
 of time $T_1$ where $c_i(t)>\thr$ to total time $T$ due to normalization of $\pc$
+%
 \begin{equation}
    \infint \pc\,dc_i\,=\,1
    \label{eq:pdf}
 \end{equation}
-
+%
 \textbf{Averaging component:}\\
 - Lowpass filter over binary response $\bi(t)$ (Eq.\,\ref{eq:lowpass}) can be
 approximated as temporal averaging over a suitable time interval $\tlp$ ($\tlp > \frac{1}{\fc}$)\\
 - Within $\tlp$, $\bi(t)$ takes a value of 1 ($c_i(t)>\thr$) for time $T_1$ ($T_1+T_0=\tlp$)
+%
 \begin{equation}
    \feat(t)\,\approx\,\frac{1}{\tlp} \int_{t}^{t\,+\,\tlp} \bi(\tau)\,d\tau\,=\,\frac{T_1}{\tlp}
    \label{eq:feat_avg}
 \end{equation}
+%
 $\rightarrow$ Temporal averaging over $\bi(t)\in[0,1]$ (Eq.\ref{eq:binary}) gives
 ratio of time $T_1$ where $c_i(t)>\thr$ to total averaging interval $\tlp$\\
 $\rightarrow$ Feature $\feat(t)$ approximately represents supra-threshold fraction of $\tlp$

 \textbf{Combined result:}\\
 - Feature $\feat(t)$ can be linked to the distribution of $c_i(t)$ using Eqs.\,\ref{eq:pdf_split} \& \ref{eq:feat_avg}
+%
 \begin{equation}
    \feat(t)\,\approx\,\int_{\thr}^{+\infty} \pclp\,dc_i\,=\,P(c_i\,>\,\thr,\,\tlp)
    \label{eq:feat_prop}
 \end{equation}
+%
 $\rightarrow$ Because the integral over a probability density is a cumulative
 probability, the value of feature $\feat(t)$ (temporal compression of $\bi(t)$)
 at every time point $t$ signifies the probability that convolution output
@@ -264,7 +310,20 @@ interval $\tlp$

 \textbf{Implication for intensity invariance:}\\
 - Convolution output $c_i(t)$ = amplitude-based quantity\\
-$\rightarrow$ Values indicate how well template waveform $k_i(t)$ matches signal $x(t)$\\
+$\rightarrow$ Values indicate correspondence between template waveform and signal\\
+
+$\rightarrow$ Values indicate correspondence between a template waveform $k_i(t)$
+matches
+the waveform of the pre-processed signal $\adapt(t)$ at a given time point $t$
+
+- Feature $\feat(t)$ = duty cycle-based quantity\\
+$\rightarrow$ Values indicate the ratio of two temporal quantities\\
+$\rightarrow$ Values indicate the ratio of time, or probability, that $c_i(t)$
+exceeds threshold value $\thr$ within
+
+
+
+
 - Feature $\feat(t)$ = duty cycle-based quantity\\
 $\rightarrow$ Values indicate how often $c_i(t)$ exceeds threshold value $\thr$