diff --git a/main.aux b/main.aux index 46425a5..b04ffb0 100644 --- a/main.aux +++ b/main.aux @@ -17,17 +17,18 @@ \newlabel{eq:gabor}{{5}{3}{}{}{}} \newlabel{eq:conv}{{6}{3}{}{}{}} \newlabel{eq:binary}{{7}{3}{}{}{}} -\newlabel{eq:lowpass}{{8}{4}{}{}{}} +\newlabel{eq:lowpass}{{8}{3}{}{}{}} \@writefile{toc}{\contentsline {section}{\numberline {3}Two mechanisms driving the emergence of intensity-invariant song representation}{4}{}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Logarithmic scaling \& spike-frequency adaptation}{4}{}\protected@file@percent } \newlabel{eq:toy_env}{{9}{4}{}{}{}} -\newlabel{eq:toy_log}{{10}{4}{}{}{}} -\newlabel{eq:toy_highpass}{{11}{4}{}{}{}} +\newlabel{eq:toy_snr}{{10}{4}{}{}{}} +\newlabel{eq:toy_log}{{11}{4}{}{}{}} +\newlabel{eq:toy_highpass}{{12}{4}{}{}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Threshold nonlinearity \& temporal averaging}{5}{}\protected@file@percent } -\newlabel{eq:pdf_split}{{12}{5}{}{}{}} -\newlabel{eq:pdf}{{13}{5}{}{}{}} -\newlabel{eq:feat_avg}{{14}{5}{}{}{}} -\newlabel{eq:feat_prop}{{15}{5}{}{}{}} +\newlabel{eq:pdf_split}{{13}{5}{}{}{}} +\newlabel{eq:pdf}{{14}{5}{}{}{}} +\newlabel{eq:feat_avg}{{15}{5}{}{}{}} +\newlabel{eq:feat_prop}{{16}{6}{}{}{}} \@writefile{toc}{\contentsline {section}{\numberline {4}Discriminating species-specific song\\patterns in feature space}{6}{}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {5}Conclusions \& outlook}{6}{}\protected@file@percent } \abx@aux@read@bbl@mdfivesum{D9585839A1478809D4C6E9E5BFFC8FE4} diff --git a/main.fdb_latexmk b/main.fdb_latexmk index afa36c3..cda1bd5 100644 --- a/main.fdb_latexmk +++ b/main.fdb_latexmk @@ -1,14 +1,14 @@ # Fdb version 4 -["biber main"] 0 "main.bcf" "main.bbl" "main" 1762872619.20857 -1 +["biber main"] 0 "main.bcf" "main.bbl" "main" 1763049091.30626 -1 "cite.bib" 1762424399.63955 252 7fc49109df64576e199ce34111f01817 "" - "main.bcf" 1762872619.1766 107531 bb4289418b1a6604c7c781cbf02f8242 "pdflatex" + "main.bcf" 1763049091.2738 107531 bb4289418b1a6604c7c781cbf02f8242 "pdflatex" (generated) "main.bbl" "main.blg" (rewritten before read) -["pdflatex"] 1762872618.80069 "/home/hartling/phd/paper/paper_2025/main.tex" "main.pdf" "main" 1762872619.20877 0 +["pdflatex"] 1763049090.88497 "/home/hartling/phd/paper/paper_2025/main.tex" "main.pdf" "main" 1763049091.30646 0 "/etc/texmf/web2c/texmf.cnf" 1761560044.43676 475 c0e671620eb5563b2130f56340a5fde8 "" - "/home/hartling/phd/paper/paper_2025/main.tex" 1762872618.31261 12714 174d8417d0722be3e50c61997690350b "" + "/home/hartling/phd/paper/paper_2025/main.tex" 1763049090.80581 14802 bc1953ed33dca011873a94b8381b353a "" "/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1577235249 3524 cb3e574dea2d1052e39280babc910dc8 "" "/usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/cmextra/cmex7.tfm" 1246382020 1004 54797486969f23fa377b128694d548df "" "/usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/cmextra/cmex8.tfm" 1246382020 988 bdf658c3bfc2d96d3c8b02cfc1c94c20 "" @@ -81,10 +81,10 @@ "/usr/share/texmf/web2c/texmf.cnf" 1707919699 40399 f2c302f7d2af602abb742093540a5834 "" "/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1761648465.84635 5472669 54eaf61a88b6b7896ebd0dac973cb29c "" "/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1761648508 8213325 7fd20752ab46ff9aa583e4973d7433df "" - "main.aux" 1762872619.1756 2095 e1147be531e57b160e5a1f27b419d862 "pdflatex" - "main.bbl" 1762858512.77528 2032 d9585839a1478809d4c6e9e5bffc8fe4 "biber main" - "main.run.xml" 1762872619.1776 2305 d0067550f22ade5b49f22d40cea22838 "pdflatex" - "main.tex" 1762872618.31261 12714 174d8417d0722be3e50c61997690350b "" + "main.aux" 1763049091.2728 2132 0170975d330d9f141156fd1cd9431731 "pdflatex" + "main.bbl" 1762945661.89497 2032 d9585839a1478809d4c6e9e5bffc8fe4 "biber main" + "main.run.xml" 1763049091.2738 2305 d0067550f22ade5b49f22d40cea22838 "pdflatex" + "main.tex" 1763049090.80581 14802 bc1953ed33dca011873a94b8381b353a "" (generated) "main.aux" "main.bcf" diff --git a/main.log b/main.log index b4e785d..5df91e6 100644 --- a/main.log +++ b/main.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Debian) (preloaded format=pdflatex 2025.10.28) 11 NOV 2025 15:50 +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Debian) (preloaded format=pdflatex 2025.10.28) 13 NOV 2025 16:51 entering extended mode restricted \write18 enabled. file:line:error style messages enabled. @@ -398,17 +398,22 @@ Overfull \hbox (3.81674pt too wide) in paragraph at lines 70--77 [1 -{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] [2] [3] -Underfull \hbox (badness 10000) in paragraph at lines 211--215 +{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] [2] [3] [4] [5] +Underfull \hbox (badness 10000) in paragraph at lines 311--314 [] -[4] -Underfull \hbox (badness 10000) in paragraph at lines 271--273 + +Overfull \hbox (2.79701pt too wide) in paragraph at lines 315--318 +[]$\OMS/cmsy/m/n/12 !$ \OT1/cmr/m/n/12 Val-ues in-di-cate cor-re-spon-dence be-tween a tem-plate wave-form $\OML/cmm/m/it/12 k[]\OT1/cmr/m/n/12 (\OML/cmm/m/it/12 t\OT1/cmr/m/n/12 )$ matches + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 330--332 [] -[5] [6] (./main.aux) +[6] (./main.aux) *********** LaTeX2e <2023-11-01> patch level 1 L3 programming layer <2024-01-22> @@ -418,15 +423,15 @@ Package logreq Info: Writing requests to 'main.run.xml'. ) Here is how much of TeX's memory you used: - 9882 strings out of 474222 - 184270 string characters out of 5748732 - 1935975 words of memory out of 5000000 - 32077 multiletter control sequences out of 15000+600000 + 9883 strings out of 474222 + 184282 string characters out of 5748732 + 1936975 words of memory out of 5000000 + 32078 multiletter control sequences out of 15000+600000 566832 words of font info for 70 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 66i,19n,81p,713b,973s stack positions out of 10000i,1000n,20000p,200000b,200000s -Output written on main.pdf (6 pages, 146175 bytes). +Output written on main.pdf (6 pages, 148721 bytes). PDF statistics: 83 PDF objects out of 1000 (max. 8388607) 50 compressed objects within 1 object stream diff --git a/main.pdf b/main.pdf index 761a502..e916f20 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/main.synctex.gz b/main.synctex.gz index f5944b5..7ded71d 100644 Binary files a/main.synctex.gz and b/main.synctex.gz differ diff --git a/main.tex b/main.tex index c14e171..33b0910 100644 --- a/main.tex +++ b/main.tex @@ -29,8 +29,8 @@ style=authoryear, \newcommand{\adapt}{\raw_{\text{adapt}}} % Adapted signal \newcommand{\dec}{\log_{10}} % Logarithm base 10 -\newcommand{\sigs}{\sigma_{\text{s}}} % Song standard deviation -\newcommand{\sign}{\sigma_{\eta}} % Noise standard deviation +\newcommand{\svar}{\sigma_{\text{s}}^{2}} % Song signal variance +\newcommand{\nvar}{\sigma_{\eta}^{2}} % Noise signal variance \newcommand{\infint}{\int_{-\infty}^{+\infty}} % Indefinite integral \newcommand{\thr}{\Theta_i} % Step function threshold value \newcommand{\nl}{H(c_i\,-\,\thr)} % Shifted Heaviside step function @@ -105,33 +105,36 @@ Initial: Continuous acoustic input signal $x(t)$ Filtering of behaviorally relevant frequencies by tympanal membrane\\ $\rightarrow$ Bandpass filter 5-30 kHz +% \begin{equation} \filt(t)\,=\,\raw(t)\,*\,\bp, \qquad \fc\,=\,5\,\text{kHz},\,30\,\text{kHz} \label{eq:bandpass} \end{equation} - +% Extraction of signal envelope (AM encoding) by receptor population\\ $\rightarrow$ Full-wave rectification, then lowpass filter 500 Hz +% \begin{equation} \env(t)\,=\,|\filt(t)|\,*\,\lp, \qquad \fc\,=\,500\,\text{Hz} \label{eq:env} \end{equation} - +% Logarithmically compressed intensity tuning curve of receptors\\ $\rightarrow$ Decibel transformation +% \begin{equation} \db(t)\,=\,10\,\cdot\,\dec \frac{\env(t)}{\dbref}, \qquad \dbref\,=\,\max[\env(t)] \label{eq:log} \end{equation} - +% Spike-frequency adaptation in receptor and interneuron populations\\ $\rightarrow$ Highpass filter 10 Hz +% \begin{equation} \adapt(t)\,=\,\db(t)\,*\,\hp, \qquad \fc\,=\,10\,\text{Hz} \label{eq:highpass} \end{equation} - - +% \subsection{Feature extraction by individual neurons} "Post-split portion" of the auditory pathway:\\ @@ -147,20 +150,24 @@ $\rightarrow$ Individual neuron-specific response traces from this stage onwards Template matching by individual ANs\\ - Filter base (STA approximations): Set of Gabor kernels\\ - Gabor parameters: $\sigma, \phi, f$ $\rightarrow$ Determines kernel sign and lobe number +% \begin{equation} k(t)\,=\,e^{-\frac{t^{2}}{2\sigma^{2}}}\,\cdot\,\sin(2\pi f t\,+\,\phi) \label{eq:gabor} \end{equation} +% $\rightarrow$ Separate convolution with each member of the kernel set +% \begin{equation} c_i(t)\,=\,\adapt(t)\,*\,k_i(t) = \infint \adapt(\tau)\,\cdot\,k_i(t\,-\,\tau)\,d\tau \label{eq:conv} \end{equation} - +% Thresholding nonlinearity in ascending neurons (or further downstream)\\ - Binarization of AN response traces into "relevant" vs. "irrelevant"\\ $\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold?) +% \begin{equation} \bi(t)\,=\,\begin{cases} \;1, \quad c_i(t)\,>\,\thr\\ @@ -168,17 +175,18 @@ $\rightarrow$ Shifted Heaviside step-function $\nl$ (or steep sigmoid threshold? \end{cases} \label{eq:binary} \end{equation} - +% Temporal averaging by neurons of the central brain\\ - Finalized set of slowly changing kernel-specific features (one per AN)\\ - Different species-specific song patterns are characterized by a distinct combination of feature values $\rightarrow$ Clusters in high-dimensional feature space\\ $\rightarrow$ Lowpass filter 1 Hz +% \begin{equation} \feat(t)\,=\,\bi(t)\,*\,\lp, \qquad \fc\,=\,1\,\text{Hz} \label{eq:lowpass} \end{equation} - +% \section{Two mechanisms driving the emergence of intensity-invariant song representation} \subsection{Logarithmic scaling \& spike-frequency adaptation} @@ -186,38 +194,69 @@ $\rightarrow$ Lowpass filter 1 Hz Envelope $\env(t)$ $\xrightarrow{\text{dB}}$ Logarithmic $\db(t)$ $\xrightarrow{\hp}$ Adapted $\adapt(t)$ - Rewrite signal envelope $\env(t)$ (Eq.\,\ref{eq:env}) as a synthetic mixture:\\ -1) Song signal $s(t)$ ($\sigs=1$) with variable multiplicative scale $\alpha\geq0$\\ -2) Fixed-scale additive noise $\eta(t)$ ($\sign=1$) +1) Song signal $s(t)$ ($\svar=1$) with variable multiplicative scale $\alpha\geq0$\\ +2) Fixed-scale additive noise $\eta(t)$ ($\nvar=1$) +% \begin{equation} \env(t)\,=\,\alpha\,\cdot\,s(t)\,+\,\eta(t),\qquad \env(t)\,>\,0\enspace\forall\enspace t\,\in\,\mathbb{R} \label{eq:toy_env} \end{equation} - +% +- Signal-to-noise ratio (SNR): Ratio of variances of synthetic mixture +$\env(t)$ with ($\alpha>0$) and without ($\alpha=0$) song signal $s(t)$, assuming $s(t)\perp\eta(t)$ +% +\begin{equation} + \text{SNR}\,=\,\frac{\sigma_{s+\eta}^{2}}{\nvar}\,=\,\frac{\alpha^{2}\,\cdot\,\svar\,+\,\nvar}{\nvar}\,=\,\alpha^{2}\,+\,1 + \label{eq:toy_snr} +\end{equation} +% \textbf{Logarithmic component:}\\ -- Apply decibel transformation (Eq.\,\ref{eq:log}) to synthetic $\env(t)$\\ +- Simplify decibel transformation (Eq.\,\ref{eq:log}) and apply to synthetic $\env(t)$\\ - Isolate scale $\alpha$ and reference $\dbref$ using logarithm product/quotient laws +% \begin{equation} \begin{split} - \db(t)\,&=\,10\,\cdot\,\dec \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\ - &=\,10\,\cdot\,\big(\dec \frac{\alpha}{\dbref}\,+\,\dec[s(t)\,+\,\frac{\eta(t)}{\alpha}]\big) + \db(t)\,&=\,\log \frac{\alpha\,\cdot\,s(t)\,+\,\eta(t)}{\dbref}\\ + &=\,\log \frac{\alpha}{\dbref}\,+\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big] \end{split} \label{eq:toy_log} \end{equation} +% $\rightarrow$ In log-space, a multiplicative scaling factor becomes additive\\ $\rightarrow$ Allows for the separation of song signal $s(t)$ and its scale $\alpha$\\ $\rightarrow$ Introduces scaling of noise term $\eta(t)$ by the inverse of $\alpha$\\ $\rightarrow$ Normalization by $\dbref$ applies equally to all terms (no individual effects) \textbf{Adaptation component:}\\ -- Highpass filter over logarithmically scaled $\db(t)$ (Eq.\,\ref{eq:highpass}) can -be approximated as subtraction of the signal offset (DC removal) within a suitable -time interval $\thp$ ($0 < \thp < \frac{1}{\fc}$)\\ +- Highpass filter over $\db(t)$ (Eq.\,\ref{eq:highpass}) can +be approximated as subtraction of the local signal offset within a suitable time +interval $\thp$ ($0 \ll \thp < \frac{1}{\fc}$) +% \begin{equation} \begin{split} - \adapt(t)\,\approx\,\db(t)\,-\,\dec \frac{\alpha}{\dbref}\,=\,\dec{[s(t)\,+\,\frac{\eta(t)}{\alpha}]} + \adapt(t)\,\approx\,\db(t)\,-\,\log \frac{\alpha}{\dbref}\,=\,\log \big[s(t)\,+\,\frac{\eta(t)}{\alpha}\big] \end{split} \label{eq:toy_highpass} \end{equation} +% +\textbf{Implication for intensity invariance:}\\ +- Logarithmic scaling is essential for equalizing different song intensities\\ +$\rightarrow$ Intensity information can be manipulated more easily when in form +of a signal offset in log-space than a multiplicative scale in linear space + +- Scale $\alpha$ can only be redistributed, not entirely eliminated from $\adapt(t)$\\ +$\rightarrow$ Turn initial scaling of song $s(t)$ by $\alpha$ into scaling of noise $\eta(t)$ by $\frac{1}{\alpha}$ + +- Capability to compensate for intensity variations, i.e. selective amplification +of output $\adapt(t)$ relative to input $\env(t)$, is limited by input SNR (Eq.\,\ref{eq:toy_snr}):\\ +$\alpha\gg1$: Attenuation of $\eta(t)$ term $\rightarrow$ $s(t)$ dominates $\adapt(t)$\\ +$\alpha\approx1$ Negligible effect on $\eta(t)$ term $\rightarrow$ $\adapt(t)=\log[s(t)+\eta(t)]$\\ +$\alpha\ll1$: Amplification of $\eta(t)$ term $\rightarrow$ $\eta(t)$ dominates $\adapt(t)$\\ +$\rightarrow$ Ability to equalize between different sufficiently large scales of $s(t)$\\ +$\rightarrow$ Inability to recover $s(t)$ when initially masked by noise floor $\eta(t)$ + +- Logarithmic scaling emphasizes small amplitudes (song onsets, noise floor) \\ +$\rightarrow$ Recurring trade-off: Equalizing signal intensity vs preserving initial SNR \subsection{Threshold nonlinearity \& temporal averaging} @@ -227,35 +266,42 @@ Convolved $c_i(t)$ $\xrightarrow{\nl}$ Binary $\bi(t)$ $\xrightarrow{\lp}$ Featu - Within an observed time interval $T$, $c_i(t)$ follows probability density $\pc$\\ - Within $T$, $c_i(t)$ exceeds threshold value $\thr$ for time $T_1$ ($T_1+T_0=T$)\\ - Threshold $\nl$ splits $\pc$ around $\thr$ in two complementary parts +% \begin{equation} \int_{\thr}^{+\infty} p(c_i,T)\,dc_i\,=\,1\,-\,\int_{-\infty}^{\thr} p(c_i,T)\,dc_i\,=\,\frac{T_1}{T} \label{eq:pdf_split} \end{equation} +% $\rightarrow$ Semi-definite integral over right-sided portion of split $\pc$ gives ratio of time $T_1$ where $c_i(t)>\thr$ to total time $T$ due to normalization of $\pc$ +% \begin{equation} \infint \pc\,dc_i\,=\,1 \label{eq:pdf} \end{equation} - +% \textbf{Averaging component:}\\ - Lowpass filter over binary response $\bi(t)$ (Eq.\,\ref{eq:lowpass}) can be approximated as temporal averaging over a suitable time interval $\tlp$ ($\tlp > \frac{1}{\fc}$)\\ - Within $\tlp$, $\bi(t)$ takes a value of 1 ($c_i(t)>\thr$) for time $T_1$ ($T_1+T_0=\tlp$) +% \begin{equation} \feat(t)\,\approx\,\frac{1}{\tlp} \int_{t}^{t\,+\,\tlp} \bi(\tau)\,d\tau\,=\,\frac{T_1}{\tlp} \label{eq:feat_avg} \end{equation} +% $\rightarrow$ Temporal averaging over $\bi(t)\in[0,1]$ (Eq.\ref{eq:binary}) gives ratio of time $T_1$ where $c_i(t)>\thr$ to total averaging interval $\tlp$\\ $\rightarrow$ Feature $\feat(t)$ approximately represents supra-threshold fraction of $\tlp$ \textbf{Combined result:}\\ - Feature $\feat(t)$ can be linked to the distribution of $c_i(t)$ using Eqs.\,\ref{eq:pdf_split} \& \ref{eq:feat_avg} +% \begin{equation} \feat(t)\,\approx\,\int_{\thr}^{+\infty} \pclp\,dc_i\,=\,P(c_i\,>\,\thr,\,\tlp) \label{eq:feat_prop} \end{equation} +% $\rightarrow$ Because the integral over a probability density is a cumulative probability, the value of feature $\feat(t)$ (temporal compression of $\bi(t)$) at every time point $t$ signifies the probability that convolution output @@ -264,7 +310,20 @@ interval $\tlp$ \textbf{Implication for intensity invariance:}\\ - Convolution output $c_i(t)$ = amplitude-based quantity\\ -$\rightarrow$ Values indicate how well template waveform $k_i(t)$ matches signal $x(t)$\\ +$\rightarrow$ Values indicate correspondence between template waveform and signal\\ + +$\rightarrow$ Values indicate correspondence between a template waveform $k_i(t)$ +matches +the waveform of the pre-processed signal $\adapt(t)$ at a given time point $t$ + +- Feature $\feat(t)$ = duty cycle-based quantity\\ +$\rightarrow$ Values indicate the ratio of two temporal quantities\\ +$\rightarrow$ Values indicate the ratio of time, or probability, that $c_i(t)$ +exceeds threshold value $\thr$ within + + + + - Feature $\feat(t)$ = duty cycle-based quantity\\ $\rightarrow$ Values indicate how often $c_i(t)$ exceeds threshold value $\thr$