Done bullet-pointing LHP invariance, nearly done with THP invariance.

This commit is contained in:
j-hartling
2025-11-13 16:52:22 +01:00
parent 53e43f61f5
commit 2b9f411a06
6 changed files with 113 additions and 48 deletions

103
main.tex
View File

@@ -29,8 +29,8 @@ style=authoryear,
\newcommand{\adapt}{\raw_{\text{adapt}}} % Adapted signal
\newcommand{\dec}{\log_{10}} % Logarithm base 10
\newcommand{\sigs}{\sigma_{\text{s}}} % Song standard deviation
\newcommand{\sign}{\sigma_{\eta}} % Noise standard deviation
\newcommand{\svar}{\sigma_{\text{s}}^{2}} % Song signal variance
\newcommand{\nvar}{\sigma_{\eta}^{2}} % Noise signal variance
\newcommand{\infint}{\int_{-\infty}^{+\infty}} % Indefinite integral
\newcommand{\thr}{\Theta_i} % Step function threshold value
\newcommand{\nl}{H(c_i\,-\,\thr)} % Shifted Heaviside step function
@@ -105,33 +105,36 @@ Initial: Continuous acoustic input signal $x(t)$
Filtering of behaviorally relevant frequencies by tympanal membrane\\
$\rightarrow$ Bandpass filter 5-30 kHz
%
\begin{equation}
\filt(t)\,=\,\raw(t)\,*\,\bp, \qquad \fc\,=\,5\,\text{kHz},\,30\,\text{kHz}
\label{eq:bandpass}
\end{equation}
%
Extraction of signal envelope (AM encoding) by receptor population\\
$\rightarrow$ Full-wave rectification, then lowpass filter 500 Hz
%
\begin{equation}
\env(t)\,=\,|\filt(t)|\,*\,\lp, \qquad \fc\,=\,500\,\text{Hz}
\label{eq:env}
\end{equation}
%
Logarithmically compressed intensity tuning curve of receptors\\
$\rightarrow$ Decibel transformation
%
\begin{equation}
\db(t)\,=\,10\,\cdot\,\dec \frac{\env(t)}{\dbref}, \qquad \dbref\,=\,\max[\env(t)]
\label{eq:log}
\end{equation}
%
Spike-frequency adaptation in receptor and interneuron populations\\
$\rightarrow$ Highpass filter 10 Hz
%
\begin{equation}
\adapt(t)\,=\,\db(t)\,*\,\hp, \qquad \fc\,=\,10\,\text{Hz}
\label{eq:highpass}
\end{equation}
%
\subsection{Feature extraction by individual neurons}
"Post-split portion" of the auditory pathway:\\
@@ -147,20 +150,24 @@ $\rightarrow$ Individual neuron-specific response traces from this stage onwards
Template matching by individual ANs\\
- Filter base (STA approximations): Set of Gabor kernels\\
- Gabor parameters: $\sigma, \phi, f$ $\rightarrow$ Determines kernel sign and lobe number
%
\begin{equation}
k(t)\,=\,e^{-\frac{t^{2}}{2\sigma^{2}}}\,\cdot\,\sin(2\pi f t\,+\,\phi)
\label{eq:gabor}
\end{equation}
%
$\rightarrow$ Separate convolution with each member of the kernel set
%
\begin{equation}
c_i(t)\,=\,\adapt(t)\,*\,k_i(t)
= \infint \adapt(\tau)\,\cdot\,k_i(t\,-\,\tau)\,d\tau
\label{eq:conv}
\end{equation}
%
Thresholding nonlinearity in ascending neurons (or further downstream)\\
- Binarization of AN response traces into "relevant" vs. "irrelevant"\\
$\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?)
%
\begin{equation}
\bi(t)\,=\,\begin{cases}
\;1, \quad c_i(t)\,>\,\thr\\
@@ -168,17 +175,18 @@ $\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?
\end{cases}
\label{eq:binary}
\end{equation}
%
Temporal averaging by neurons of the central brain\\
- Finalized set of slowly changing kernel-specific features (one per AN)\\
- Different species-specific song patterns are characterized by a distinct combination
of feature values $\rightarrow$ Clusters in high-dimensional feature space\\
$\rightarrow$ Lowpass filter 1 Hz
%
\begin{equation}
\feat(t)\,=\,\bi(t)\,*\,\lp, \qquad \fc\,=\,1\,\text{Hz}
\label{eq:lowpass}
\end{equation}
%
\section{Two mechanisms driving the emergence of intensity-invariant song representation}
\subsection{Logarithmic scaling \& spike-frequency adaptation}
@@ -186,38 +194,69 @@ $\rightarrow$ Lowpass filter 1 Hz
Envelope $\env(t)$ $\xrightarrow{\text{dB}}$ Logarithmic $\db(t)$ $\xrightarrow{\hp}$ Adapted $\adapt(t)$
- Rewrite signal envelope $\env(t)$ (Eq.\,\ref{eq:env}) as a synthetic mixture:\\
1) Song signal $s(t)$ ($\sigs=1$) with variable multiplicative scale $\alpha\geq0$\\
2) Fixed-scale additive noise $\eta(t)$ ($\sign=1$)
1) Song signal $s(t)$ ($\svar=1$) with variable multiplicative scale $\alpha\geq0$\\
2) Fixed-scale additive noise $\eta(t)$ ($\nvar=1$)
%
\begin{equation}
\env(t)\,=\,\alpha\,\cdot\,s(t)\,+\,\eta(t),\qquad \env(t)\,>\,0\enspace\forall\enspace t\,\in\,\mathbb{R}
\label{eq:toy_env}
\end{equation}
%
- Signal-to-noise ratio (SNR): Ratio of variances of synthetic mixture
$\env(t)$ with ($\alpha>0$) and without ($\alpha=0$) song signal $s(t)$, assuming $s(t)\perp\eta(t)$
%
\begin{equation}
\text{SNR}\,=\,\frac{\sigma_{s+\eta}^{2}}{\nvar}\,=\,\frac{\alpha^{2}\,\cdot\,\svar\,+\,\nvar}{\nvar}\,=\,\alpha^{2}\,+\,1
\label{eq:toy_snr}
\end{equation}
%
\textbf{Logarithmic component:}\\
- Apply decibel transformation (Eq.\,\ref{eq:log}) to synthetic $\env(t)$\\
- Simplify decibel transformation (Eq.\,\ref{eq:log}) and apply to synthetic $\env(t)$\\
- Isolate scale $\alpha$ and reference $\dbref$ using logarithm product/quotient laws
%
\begin{equation}
\begin{split}
\db(t)\,&=\,10\,\cdot\,\dec \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\
&=\,10\,\cdot\,\big(\dec \frac{\alpha}{\dbref}\,+\,\dec[s(t)\,+\,\frac{\eta(t)}{\alpha}]\big)
\db(t)\,&=\,\log \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\
&=\,\log \frac{\alpha}{\dbref}\,+\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big]
\end{split}
\label{eq:toy_log}
\end{equation}
%
$\rightarrow$ In log-space, a multiplicative scaling factor becomes additive\\
$\rightarrow$ Allows for the separation of song signal $s(t)$ and its scale $\alpha$\\
$\rightarrow$ Introduces scaling of noise term $\eta(t)$ by the inverse of $\alpha$\\
$\rightarrow$ Normalization by $\dbref$ applies equally to all terms (no individual effects)
\textbf{Adaptation component:}\\
- Highpass filter over logarithmically scaled $\db(t)$ (Eq.\,\ref{eq:highpass}) can
be approximated as subtraction of the signal offset (DC removal) within a suitable
time interval $\thp$ ($0 < \thp < \frac{1}{\fc}$)\\
- Highpass filter over $\db(t)$ (Eq.\,\ref{eq:highpass}) can
be approximated as subtraction of the local signal offset within a suitable time
interval $\thp$ ($0 \ll \thp < \frac{1}{\fc}$)
%
\begin{equation}
\begin{split}
\adapt(t)\,\approx\,\db(t)\,-\,\dec \frac{\alpha}{\dbref}\,=\,\dec{[s(t)\,+\,\frac{\eta(t)}{\alpha}]}
\adapt(t)\,\approx\,\db(t)\,-\,\log \frac{\alpha}{\dbref}\,=\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big]
\end{split}
\label{eq:toy_highpass}
\end{equation}
%
\textbf{Implication for intensity invariance:}\\
- Logarithmic scaling is essential for equalizing different song intensities\\
$\rightarrow$ Intensity information can be manipulated more easily when in form
of a signal offset in log-space than a multiplicative scale in linear space
- Scale $\alpha$ can only be redistributed, not entirely eliminated from $\adapt(t)$\\
$\rightarrow$ Turn initial scaling of song $s(t)$ by $\alpha$ into scaling of noise $\eta(t)$ by $\frac{1}{\alpha}$
- Capability to compensate for intensity variations, i.e. selective amplification
of output $\adapt(t)$ relative to input $\env(t)$, is limited by input SNR (Eq.\,\ref{eq:toy_snr}):\\
$\alpha\gg1$: Attenuation of $\eta(t)$ term $\rightarrow$ $s(t)$ dominates $\adapt(t)$\\
$\alpha\approx1$ Negligible effect on $\eta(t)$ term $\rightarrow$ $\adapt(t)=\log[s(t)+\eta(t)]$\\
$\alpha\ll1$: Amplification of $\eta(t)$ term $\rightarrow$ $\eta(t)$ dominates $\adapt(t)$\\
$\rightarrow$ Ability to equalize between different sufficiently large scales of $s(t)$\\
$\rightarrow$ Inability to recover $s(t)$ when initially masked by noise floor $\eta(t)$
- Logarithmic scaling emphasizes small amplitudes (song onsets, noise floor) \\
$\rightarrow$ Recurring trade-off: Equalizing signal intensity vs preserving initial SNR
\subsection{Threshold nonlinearity \& temporal averaging}
@@ -227,35 +266,42 @@ Convolved $c_i(t)$ $\xrightarrow{\nl}$ Binary $\bi(t)$ $\xrightarrow{\lp}$ Featu
- Within an observed time interval $T$, $c_i(t)$ follows probability density $\pc$\\
- Within $T$, $c_i(t)$ exceeds threshold value $\thr$ for time $T_1$ ($T_1+T_0=T$)\\
- Threshold $\nl$ splits $\pc$ around $\thr$ in two complementary parts
%
\begin{equation}
\int_{\thr}^{+\infty} p(c_i,T)\,dc_i\,=\,1\,-\,\int_{-\infty}^{\thr} p(c_i,T)\,dc_i\,=\,\frac{T_1}{T}
\label{eq:pdf_split}
\end{equation}
%
$\rightarrow$ Semi-definite integral over right-sided portion of split $\pc$ gives ratio
of time $T_1$ where $c_i(t)>\thr$ to total time $T$ due to normalization of $\pc$
%
\begin{equation}
\infint \pc\,dc_i\,=\,1
\label{eq:pdf}
\end{equation}
%
\textbf{Averaging component:}\\
- Lowpass filter over binary response $\bi(t)$ (Eq.\,\ref{eq:lowpass}) can be
approximated as temporal averaging over a suitable time interval $\tlp$ ($\tlp > \frac{1}{\fc}$)\\
- Within $\tlp$, $\bi(t)$ takes a value of 1 ($c_i(t)>\thr$) for time $T_1$ ($T_1+T_0=\tlp$)
%
\begin{equation}
\feat(t)\,\approx\,\frac{1}{\tlp} \int_{t}^{t\,+\,\tlp} \bi(\tau)\,d\tau\,=\,\frac{T_1}{\tlp}
\label{eq:feat_avg}
\end{equation}
%
$\rightarrow$ Temporal averaging over $\bi(t)\in[0,1]$ (Eq.\ref{eq:binary}) gives
ratio of time $T_1$ where $c_i(t)>\thr$ to total averaging interval $\tlp$\\
$\rightarrow$ Feature $\feat(t)$ approximately represents supra-threshold fraction of $\tlp$
\textbf{Combined result:}\\
- Feature $\feat(t)$ can be linked to the distribution of $c_i(t)$ using Eqs.\,\ref{eq:pdf_split} \& \ref{eq:feat_avg}
%
\begin{equation}
\feat(t)\,\approx\,\int_{\thr}^{+\infty} \pclp\,dc_i\,=\,P(c_i\,>\,\thr,\,\tlp)
\label{eq:feat_prop}
\end{equation}
%
$\rightarrow$ Because the integral over a probability density is a cumulative
probability, the value of feature $\feat(t)$ (temporal compression of $\bi(t)$)
at every time point $t$ signifies the probability that convolution output
@@ -264,7 +310,20 @@ interval $\tlp$
\textbf{Implication for intensity invariance:}\\
- Convolution output $c_i(t)$ = amplitude-based quantity\\
$\rightarrow$ Values indicate how well template waveform $k_i(t)$ matches signal $x(t)$\\
$\rightarrow$ Values indicate correspondence between template waveform and signal\\
$\rightarrow$ Values indicate correspondence between a template waveform $k_i(t)$
matches
the waveform of the pre-processed signal $\adapt(t)$ at a given time point $t$
- Feature $\feat(t)$ = duty cycle-based quantity\\
$\rightarrow$ Values indicate the ratio of two temporal quantities\\
$\rightarrow$ Values indicate the ratio of time, or probability, that $c_i(t)$
exceeds threshold value $\thr$ within
- Feature $\feat(t)$ = duty cycle-based quantity\\
$\rightarrow$ Values indicate how often $c_i(t)$ exceeds threshold value $\thr$