Done bullet-pointing LHP invariance, nearly done with THP invariance.
This commit is contained in:
103
main.tex
103
main.tex
@@ -29,8 +29,8 @@ style=authoryear,
|
||||
\newcommand{\adapt}{\raw_{\text{adapt}}} % Adapted signal
|
||||
|
||||
\newcommand{\dec}{\log_{10}} % Logarithm base 10
|
||||
\newcommand{\sigs}{\sigma_{\text{s}}} % Song standard deviation
|
||||
\newcommand{\sign}{\sigma_{\eta}} % Noise standard deviation
|
||||
\newcommand{\svar}{\sigma_{\text{s}}^{2}} % Song signal variance
|
||||
\newcommand{\nvar}{\sigma_{\eta}^{2}} % Noise signal variance
|
||||
\newcommand{\infint}{\int_{-\infty}^{+\infty}} % Indefinite integral
|
||||
\newcommand{\thr}{\Theta_i} % Step function threshold value
|
||||
\newcommand{\nl}{H(c_i\,-\,\thr)} % Shifted Heaviside step function
|
||||
@@ -105,33 +105,36 @@ Initial: Continuous acoustic input signal $x(t)$
|
||||
|
||||
Filtering of behaviorally relevant frequencies by tympanal membrane\\
|
||||
$\rightarrow$ Bandpass filter 5-30 kHz
|
||||
%
|
||||
\begin{equation}
|
||||
\filt(t)\,=\,\raw(t)\,*\,\bp, \qquad \fc\,=\,5\,\text{kHz},\,30\,\text{kHz}
|
||||
\label{eq:bandpass}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
Extraction of signal envelope (AM encoding) by receptor population\\
|
||||
$\rightarrow$ Full-wave rectification, then lowpass filter 500 Hz
|
||||
%
|
||||
\begin{equation}
|
||||
\env(t)\,=\,|\filt(t)|\,*\,\lp, \qquad \fc\,=\,500\,\text{Hz}
|
||||
\label{eq:env}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
Logarithmically compressed intensity tuning curve of receptors\\
|
||||
$\rightarrow$ Decibel transformation
|
||||
%
|
||||
\begin{equation}
|
||||
\db(t)\,=\,10\,\cdot\,\dec \frac{\env(t)}{\dbref}, \qquad \dbref\,=\,\max[\env(t)]
|
||||
\label{eq:log}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
Spike-frequency adaptation in receptor and interneuron populations\\
|
||||
$\rightarrow$ Highpass filter 10 Hz
|
||||
%
|
||||
\begin{equation}
|
||||
\adapt(t)\,=\,\db(t)\,*\,\hp, \qquad \fc\,=\,10\,\text{Hz}
|
||||
\label{eq:highpass}
|
||||
\end{equation}
|
||||
|
||||
|
||||
%
|
||||
\subsection{Feature extraction by individual neurons}
|
||||
|
||||
"Post-split portion" of the auditory pathway:\\
|
||||
@@ -147,20 +150,24 @@ $\rightarrow$ Individual neuron-specific response traces from this stage onwards
|
||||
Template matching by individual ANs\\
|
||||
- Filter base (STA approximations): Set of Gabor kernels\\
|
||||
- Gabor parameters: $\sigma, \phi, f$ $\rightarrow$ Determines kernel sign and lobe number
|
||||
%
|
||||
\begin{equation}
|
||||
k(t)\,=\,e^{-\frac{t^{2}}{2\sigma^{2}}}\,\cdot\,\sin(2\pi f t\,+\,\phi)
|
||||
\label{eq:gabor}
|
||||
\end{equation}
|
||||
%
|
||||
$\rightarrow$ Separate convolution with each member of the kernel set
|
||||
%
|
||||
\begin{equation}
|
||||
c_i(t)\,=\,\adapt(t)\,*\,k_i(t)
|
||||
= \infint \adapt(\tau)\,\cdot\,k_i(t\,-\,\tau)\,d\tau
|
||||
\label{eq:conv}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
Thresholding nonlinearity in ascending neurons (or further downstream)\\
|
||||
- Binarization of AN response traces into "relevant" vs. "irrelevant"\\
|
||||
$\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?)
|
||||
%
|
||||
\begin{equation}
|
||||
\bi(t)\,=\,\begin{cases}
|
||||
\;1, \quad c_i(t)\,>\,\thr\\
|
||||
@@ -168,17 +175,18 @@ $\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?
|
||||
\end{cases}
|
||||
\label{eq:binary}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
Temporal averaging by neurons of the central brain\\
|
||||
- Finalized set of slowly changing kernel-specific features (one per AN)\\
|
||||
- Different species-specific song patterns are characterized by a distinct combination
|
||||
of feature values $\rightarrow$ Clusters in high-dimensional feature space\\
|
||||
$\rightarrow$ Lowpass filter 1 Hz
|
||||
%
|
||||
\begin{equation}
|
||||
\feat(t)\,=\,\bi(t)\,*\,\lp, \qquad \fc\,=\,1\,\text{Hz}
|
||||
\label{eq:lowpass}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
\section{Two mechanisms driving the emergence of intensity-invariant song representation}
|
||||
|
||||
\subsection{Logarithmic scaling \& spike-frequency adaptation}
|
||||
@@ -186,38 +194,69 @@ $\rightarrow$ Lowpass filter 1 Hz
|
||||
Envelope $\env(t)$ $\xrightarrow{\text{dB}}$ Logarithmic $\db(t)$ $\xrightarrow{\hp}$ Adapted $\adapt(t)$
|
||||
|
||||
- Rewrite signal envelope $\env(t)$ (Eq.\,\ref{eq:env}) as a synthetic mixture:\\
|
||||
1) Song signal $s(t)$ ($\sigs=1$) with variable multiplicative scale $\alpha\geq0$\\
|
||||
2) Fixed-scale additive noise $\eta(t)$ ($\sign=1$)
|
||||
1) Song signal $s(t)$ ($\svar=1$) with variable multiplicative scale $\alpha\geq0$\\
|
||||
2) Fixed-scale additive noise $\eta(t)$ ($\nvar=1$)
|
||||
%
|
||||
\begin{equation}
|
||||
\env(t)\,=\,\alpha\,\cdot\,s(t)\,+\,\eta(t),\qquad \env(t)\,>\,0\enspace\forall\enspace t\,\in\,\mathbb{R}
|
||||
\label{eq:toy_env}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
- Signal-to-noise ratio (SNR): Ratio of variances of synthetic mixture
|
||||
$\env(t)$ with ($\alpha>0$) and without ($\alpha=0$) song signal $s(t)$, assuming $s(t)\perp\eta(t)$
|
||||
%
|
||||
\begin{equation}
|
||||
\text{SNR}\,=\,\frac{\sigma_{s+\eta}^{2}}{\nvar}\,=\,\frac{\alpha^{2}\,\cdot\,\svar\,+\,\nvar}{\nvar}\,=\,\alpha^{2}\,+\,1
|
||||
\label{eq:toy_snr}
|
||||
\end{equation}
|
||||
%
|
||||
\textbf{Logarithmic component:}\\
|
||||
- Apply decibel transformation (Eq.\,\ref{eq:log}) to synthetic $\env(t)$\\
|
||||
- Simplify decibel transformation (Eq.\,\ref{eq:log}) and apply to synthetic $\env(t)$\\
|
||||
- Isolate scale $\alpha$ and reference $\dbref$ using logarithm product/quotient laws
|
||||
%
|
||||
\begin{equation}
|
||||
\begin{split}
|
||||
\db(t)\,&=\,10\,\cdot\,\dec \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\
|
||||
&=\,10\,\cdot\,\big(\dec \frac{\alpha}{\dbref}\,+\,\dec[s(t)\,+\,\frac{\eta(t)}{\alpha}]\big)
|
||||
\db(t)\,&=\,\log \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\
|
||||
&=\,\log \frac{\alpha}{\dbref}\,+\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big]
|
||||
\end{split}
|
||||
\label{eq:toy_log}
|
||||
\end{equation}
|
||||
%
|
||||
$\rightarrow$ In log-space, a multiplicative scaling factor becomes additive\\
|
||||
$\rightarrow$ Allows for the separation of song signal $s(t)$ and its scale $\alpha$\\
|
||||
$\rightarrow$ Introduces scaling of noise term $\eta(t)$ by the inverse of $\alpha$\\
|
||||
$\rightarrow$ Normalization by $\dbref$ applies equally to all terms (no individual effects)
|
||||
|
||||
\textbf{Adaptation component:}\\
|
||||
- Highpass filter over logarithmically scaled $\db(t)$ (Eq.\,\ref{eq:highpass}) can
|
||||
be approximated as subtraction of the signal offset (DC removal) within a suitable
|
||||
time interval $\thp$ ($0 < \thp < \frac{1}{\fc}$)\\
|
||||
- Highpass filter over $\db(t)$ (Eq.\,\ref{eq:highpass}) can
|
||||
be approximated as subtraction of the local signal offset within a suitable time
|
||||
interval $\thp$ ($0 \ll \thp < \frac{1}{\fc}$)
|
||||
%
|
||||
\begin{equation}
|
||||
\begin{split}
|
||||
\adapt(t)\,\approx\,\db(t)\,-\,\dec \frac{\alpha}{\dbref}\,=\,\dec{[s(t)\,+\,\frac{\eta(t)}{\alpha}]}
|
||||
\adapt(t)\,\approx\,\db(t)\,-\,\log \frac{\alpha}{\dbref}\,=\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big]
|
||||
\end{split}
|
||||
\label{eq:toy_highpass}
|
||||
\end{equation}
|
||||
%
|
||||
\textbf{Implication for intensity invariance:}\\
|
||||
- Logarithmic scaling is essential for equalizing different song intensities\\
|
||||
$\rightarrow$ Intensity information can be manipulated more easily when in form
|
||||
of a signal offset in log-space than a multiplicative scale in linear space
|
||||
|
||||
- Scale $\alpha$ can only be redistributed, not entirely eliminated from $\adapt(t)$\\
|
||||
$\rightarrow$ Turn initial scaling of song $s(t)$ by $\alpha$ into scaling of noise $\eta(t)$ by $\frac{1}{\alpha}$
|
||||
|
||||
- Capability to compensate for intensity variations, i.e. selective amplification
|
||||
of output $\adapt(t)$ relative to input $\env(t)$, is limited by input SNR (Eq.\,\ref{eq:toy_snr}):\\
|
||||
$\alpha\gg1$: Attenuation of $\eta(t)$ term $\rightarrow$ $s(t)$ dominates $\adapt(t)$\\
|
||||
$\alpha\approx1$ Negligible effect on $\eta(t)$ term $\rightarrow$ $\adapt(t)=\log[s(t)+\eta(t)]$\\
|
||||
$\alpha\ll1$: Amplification of $\eta(t)$ term $\rightarrow$ $\eta(t)$ dominates $\adapt(t)$\\
|
||||
$\rightarrow$ Ability to equalize between different sufficiently large scales of $s(t)$\\
|
||||
$\rightarrow$ Inability to recover $s(t)$ when initially masked by noise floor $\eta(t)$
|
||||
|
||||
- Logarithmic scaling emphasizes small amplitudes (song onsets, noise floor) \\
|
||||
$\rightarrow$ Recurring trade-off: Equalizing signal intensity vs preserving initial SNR
|
||||
|
||||
\subsection{Threshold nonlinearity \& temporal averaging}
|
||||
|
||||
@@ -227,35 +266,42 @@ Convolved $c_i(t)$ $\xrightarrow{\nl}$ Binary $\bi(t)$ $\xrightarrow{\lp}$ Featu
|
||||
- Within an observed time interval $T$, $c_i(t)$ follows probability density $\pc$\\
|
||||
- Within $T$, $c_i(t)$ exceeds threshold value $\thr$ for time $T_1$ ($T_1+T_0=T$)\\
|
||||
- Threshold $\nl$ splits $\pc$ around $\thr$ in two complementary parts
|
||||
%
|
||||
\begin{equation}
|
||||
\int_{\thr}^{+\infty} p(c_i,T)\,dc_i\,=\,1\,-\,\int_{-\infty}^{\thr} p(c_i,T)\,dc_i\,=\,\frac{T_1}{T}
|
||||
\label{eq:pdf_split}
|
||||
\end{equation}
|
||||
%
|
||||
$\rightarrow$ Semi-definite integral over right-sided portion of split $\pc$ gives ratio
|
||||
of time $T_1$ where $c_i(t)>\thr$ to total time $T$ due to normalization of $\pc$
|
||||
%
|
||||
\begin{equation}
|
||||
\infint \pc\,dc_i\,=\,1
|
||||
\label{eq:pdf}
|
||||
\end{equation}
|
||||
|
||||
%
|
||||
\textbf{Averaging component:}\\
|
||||
- Lowpass filter over binary response $\bi(t)$ (Eq.\,\ref{eq:lowpass}) can be
|
||||
approximated as temporal averaging over a suitable time interval $\tlp$ ($\tlp > \frac{1}{\fc}$)\\
|
||||
- Within $\tlp$, $\bi(t)$ takes a value of 1 ($c_i(t)>\thr$) for time $T_1$ ($T_1+T_0=\tlp$)
|
||||
%
|
||||
\begin{equation}
|
||||
\feat(t)\,\approx\,\frac{1}{\tlp} \int_{t}^{t\,+\,\tlp} \bi(\tau)\,d\tau\,=\,\frac{T_1}{\tlp}
|
||||
\label{eq:feat_avg}
|
||||
\end{equation}
|
||||
%
|
||||
$\rightarrow$ Temporal averaging over $\bi(t)\in[0,1]$ (Eq.\ref{eq:binary}) gives
|
||||
ratio of time $T_1$ where $c_i(t)>\thr$ to total averaging interval $\tlp$\\
|
||||
$\rightarrow$ Feature $\feat(t)$ approximately represents supra-threshold fraction of $\tlp$
|
||||
|
||||
\textbf{Combined result:}\\
|
||||
- Feature $\feat(t)$ can be linked to the distribution of $c_i(t)$ using Eqs.\,\ref{eq:pdf_split} \& \ref{eq:feat_avg}
|
||||
%
|
||||
\begin{equation}
|
||||
\feat(t)\,\approx\,\int_{\thr}^{+\infty} \pclp\,dc_i\,=\,P(c_i\,>\,\thr,\,\tlp)
|
||||
\label{eq:feat_prop}
|
||||
\end{equation}
|
||||
%
|
||||
$\rightarrow$ Because the integral over a probability density is a cumulative
|
||||
probability, the value of feature $\feat(t)$ (temporal compression of $\bi(t)$)
|
||||
at every time point $t$ signifies the probability that convolution output
|
||||
@@ -264,7 +310,20 @@ interval $\tlp$
|
||||
|
||||
\textbf{Implication for intensity invariance:}\\
|
||||
- Convolution output $c_i(t)$ = amplitude-based quantity\\
|
||||
$\rightarrow$ Values indicate how well template waveform $k_i(t)$ matches signal $x(t)$\\
|
||||
$\rightarrow$ Values indicate correspondence between template waveform and signal\\
|
||||
|
||||
$\rightarrow$ Values indicate correspondence between a template waveform $k_i(t)$
|
||||
matches
|
||||
the waveform of the pre-processed signal $\adapt(t)$ at a given time point $t$
|
||||
|
||||
- Feature $\feat(t)$ = duty cycle-based quantity\\
|
||||
$\rightarrow$ Values indicate the ratio of two temporal quantities\\
|
||||
$\rightarrow$ Values indicate the ratio of time, or probability, that $c_i(t)$
|
||||
exceeds threshold value $\thr$ within
|
||||
|
||||
|
||||
|
||||
|
||||
- Feature $\feat(t)$ = duty cycle-based quantity\\
|
||||
$\rightarrow$ Values indicate how often $c_i(t)$ exceeds threshold value $\thr$
|
||||
|
||||
|
||||
Reference in New Issue
Block a user