Fetched bunch of species-specific song snippets.

Worked those into LogHP analysis. Worked results into fig_invariance_log-hp.pdf. Put details into new fig_invariance_log-hp_species.pdf (appendix).
2026-04-14 17:30:58 +02:00
parent 0b9264b1e1
commit 36ac504efa
17 changed files with 490 additions and 205 deletions
--- a/python/condense_inv_data_log-hp.py
+++ b/python/condense_inv_data_log-hp.py
@@ -0,0 +1,136 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from thunderhopper.filetools import search_files, crop_paths
+from thunderhopper.modeltools import load_data, save_data
+from misc_functions import shorten_species
+from IPython import embed
+
+# GENERAL SETTINGS:
+target_species = [
+    'Chorthippus_biguttulus',
+    'Chorthippus_mollis',
+    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
+    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
+    'Pseudochorthippus_parallelus',
+]
+sources = [
+    'BM04',
+    'BM93',
+    'DJN',
+    'GBC',
+    'FTN'
+]
+search_path = '../data/inv/log_hp/'
+ref_path = '../data/inv/log_hp/ref_measures.npz'
+save_path = '../data/inv/log_hp/condensed/'
+
+# ANALYSIS SETTINGS:
+compute_ratios = True
+plot_overview = True
+
+# PREPARATION:
+if compute_ratios:
+    ref_measure = np.load(ref_path)['inv']
+if plot_overview:
+    fig, axes = plt.subplots(3, len(target_species), figsize=(16, 9),
+                             sharex=True, sharey=True, layout='constrained')
+    axes[0, 0].set_ylabel('songs')
+    axes[1, 0].set_ylabel('recordings\n(mean ± SD)')
+    axes[2, 0].set_ylabel('total\n(mean ± SEM)')
+
+# EXECUTION:
+for i, species in enumerate(target_species):
+    print(f'Processing {species}')
+    if plot_overview:
+        axes[0, i].set_title(shorten_species(species))
+
+    # Fetch all species-specific song files:
+    all_paths = search_files(species, incl='noise', ext='npz', dir=search_path)
+
+    # Separate by source:
+    sorted_paths = {}
+    for source in sources:
+
+        # Check for any source-specific song files:
+        source_paths = [path for path in all_paths if source in path]
+        if not source_paths:
+            continue
+
+        # Separate by recording:
+        sorted_paths[source] = [[]]
+        for path, name in zip(source_paths, crop_paths(source_paths)):
+
+            # Find numerical ID behind source tag:
+            id_ind = name.find(source) + len(source) + 1
+            # Check if ID is followed by sub-ID:
+            sub_id = name[id_ind:].split('-')[1]
+            if 's' in sub_id:
+                # Single (time stamp in next spot):
+                sorted_paths[source][0].append(path)
+                continue
+            sub_id = int(sub_id)
+            # Multiple (sub-ID in next spot):
+            if sub_id > len(sorted_paths[source]):
+                # Open new recording-specific slot:
+                sorted_paths[source].append([])
+            sorted_paths[source][sub_id - 1].append(path)
+    
+    # Re-sort song files only by recording (discarding source separation):
+    sorted_paths = [path for paths in sorted_paths.values() for path in paths]
+    
+    # Condense across song files per recording:
+    for j, rec_paths in enumerate(sorted_paths):
+        for k, path in enumerate(rec_paths):
+
+            # Load invariance data:
+            data, _ = load_data(path, ['scales', 'measure_inv'])
+            scales, measure = data['scales'], data['measure_inv']
+
+            # Relate to noise:
+            if compute_ratios:
+                measure /= ref_measure
+
+            if k == 0:
+                # Prepare song file-specific storage:
+                file_data = np.zeros((scales.size, len(rec_paths)), dtype=float)
+                if j == 0:
+                    # Prepare recording-specific storage:
+                    rec_mean = np.zeros((scales.size, len(sorted_paths)), dtype=float)
+                    rec_sd = np.zeros((scales.size, len(sorted_paths)), dtype=float)
+
+            # Log song file data:
+            file_data[:, k] = measure
+
+            if plot_overview:
+                axes[0, i].plot(scales, measure, c='k', alpha=0.5)
+
+        # Get recording statistics:
+        rec_mean[:, j] = file_data.mean(axis=1)
+        rec_sd[:, j] = file_data.std(axis=1)
+
+        if plot_overview:
+            axes[1, i].plot(scales, rec_mean[:, j], c='k')
+            axes[1, i].fill_between(scales, rec_mean[:, j] - rec_sd[:, j],
+                                    rec_mean[:, j] + rec_sd[:, j], color='k', alpha=0.2)
+
+    # Save condensed recording data for current species:
+    np.savez(save_path + species, scales=scales, mean=rec_mean, sd=rec_sd)
+
+    if plot_overview:
+        spec_mean = rec_mean.mean(axis=1)
+        spec_sd = rec_mean.std(axis=1)
+        axes[2, i].plot(scales, spec_mean, c='k')
+        axes[2, i].fill_between(scales, spec_mean - spec_sd, spec_mean + spec_sd,
+                                color='k', alpha=0.2)
+
+print('Done.')
+
+if plot_overview:
+    axes[0, 0].set_xlim(scales[0], scales[-1])
+    axes[0, 0].set_xscale('log')
+    axes[0, 0].set_yscale('log')
+plt.show()
+
+            
--- a/python/fig_invariance_log-hp.py
+++ b/python/fig_invariance_log-hp.py
@@ -27,58 +27,29 @@ def plot_snippets(axes, time, snippets, ymin=None, ymax=None, **kwargs):
        handles.extend(plot_line(ax, time, snippet, ymin=ymin, ymax=ymax, **kwargs))
    return handles

-def plot_dist_shifted(ax, data, axis, pdf=None, sigma=0.1, which='x',
-                      base=None, cap=None, add_pdf=False, shifted=False, **kwargs):
-    if pdf is None:
-        pdf, axis = get_kde(data, sigma, axis)
-    if base is None:
-        base = pdf.min()
-    if cap is None:
-        cap = pdf.max()
-    pdf = (pdf - pdf.min()) / (pdf.max() - pdf.min()) * (cap - base) + base
-
-    if which == 'x':
-        transform = ax.get_xaxis_transform()
-    elif which == 'y':
-        transform = ax.get_yaxis_transform()
-    else:
-        transform = ax.transData
-
-    rng = np.random.default_rng()
-    handles = []
-    for value in data:
-        ind = np.nonzero(axis == value)[0][0]
-        offset = base if not shifted else rng.uniform(base, pdf[ind])
-        variables = (offset, value) if which=='y' else (value, offset)
-        handles.extend(ax.plot(*variables, transform=transform, **kwargs))
-    if add_pdf:
-        variables = (pdf, axis) if which=='y' else (axis, pdf)
-        pdf_handle = ax.plot(*variables, transform=transform, c='k', lw=1)
-        return handles, pdf_handle
-    return handles
-
-def zalpha(handles, background='w', down=1):
-    twins = []
-    for handle in handles:
-        twin = handle.copy()
-        twin.set(color=background, alpha=1)
-        twin.set_zorder(handle.get_zorder() - down)
-        twins.append(twin)
-    return twins
+# def zalpha(handles, background='w', down=1):
+#     twins = []
+#     for handle in handles:
+#         twin = handle.copy()
+#         twin.set(color=background, alpha=1)
+#         twin.set_zorder(handle.get_zorder() - down)
+#         twins.append(twin)
+#     return twins

 # GENERAL SETTINGS:
-target = 'Omocestus_rufipes'
+target = 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms'
 data_paths = search_files(target, excl='noise', dir='../data/inv/log_hp/')
 ref_path = '../data/inv/log_hp/ref_measures.npz'
 save_path = '../figures/fig_invariance_log_hp.pdf'
 target_species = [
-    'Omocestus_rufipes',
    'Chorthippus_biguttulus',
    'Chorthippus_mollis',
    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
    'Pseudochorthippus_parallelus',
-    ]
+]
 stages = ['env', 'log', 'inv']
 load_kwargs = dict(
    files=stages,
@@ -159,12 +130,13 @@ fs = dict(
 )
 colors = load_colors('../data/stage_colors.npz')
 species_colors = load_colors('../data/species_colors.npz')
-noise_colors = [(0.5, 0.5, 0.5), (0.7, 0.7, 0.7)]
+noise_colors = [(0.6,) * 3, (0.8,) * 3]
 lw = dict(
    snip=1,
    big=4,
    spec=2,
-    plateau=1,
+    plateau=1.5,
+    legend=5,
 )
 xlabels = dict(
    big='scale $\\alpha$',
@@ -273,7 +245,7 @@ leg_kwargs = dict(
    columnspacing=1,
 )
 diag_kwargs = dict(
-    c=(0.75, 0.75, 0.75),
+    c=(0.3,) * 3,
    lw=2,
    ls='--',
    zorder=1.9,
@@ -297,18 +269,9 @@ plateau_line_kwargs = dict(
 )
 plateau_dot_kwargs = dict(
    marker='o',
-    markersize=10,
-    markeredgecolor='k',
+    markersize=8,
    markeredgewidth=1,
-    # alpha=1,
-    zorder=6,
    clip_on=False,
-    # base=0,
-    # cap=0.15,
-    # add_pdf=True,
-)
-kde_kwargs = dict(
-    sigma=0.1,
 )

 # PREPARATION:
@@ -317,18 +280,12 @@ if compute_ratios:

 species_measures = {}
 thresh_inds = np.zeros((len(target_species),), dtype=int)
-thresh_scales = np.zeros((len(target_species),), dtype=float)
 for i, species in enumerate(target_species):
-    path = search_files(species, incl='noise', dir='../data/inv/log_hp/')[0]
-    species_data = load_data(path, **load_kwargs)[0]
-    measure = species_data['measure_inv']
-    scales = species_data['scales']
-    if compute_ratios:
-        measure /= ref_measures['inv']
+    spec_path = search_files(species, dir='../data/inv/log_hp/condensed/')[0]
+    spec_data = dict(np.load(spec_path))
+    measure = spec_data['mean'].mean(axis=1)
    species_measures[species] = measure
    thresh_inds[i] = get_saturation(measure, **plateau_settings)[1]
-    thresh_scales[i] = scales[thresh_inds[i]]
-thresh_pdf, pdf_axis = get_kde(thresh_scales, axis=scales, **kde_kwargs)

 # EXECUTION:
 for data_path in data_paths:
@@ -460,24 +417,23 @@ for data_path in data_paths:
        big_axes[1].axvspan(noise_scales[low_ind], noise_scales[high_ind],
                            fc=noise_colors[1], **plateau_rect_kwargs)

-    # Plot species-specific noise-song measures:
+    # Plot species-specific noise-song invariance curves:
    for i, (species, measure) in enumerate(species_measures.items()):
+        # Plot invariance curve:
        color = species_colors[species]
-        ind, scale = thresh_inds[i], thresh_scales[i]
        big_axes[2].plot(noise_scales, measure, label=shorten_species(species),
                         c=color, lw=lw['spec'])
+        # Indicate saturation:
+        ind = thresh_inds[i]
+        scale = noise_scales[ind]
        big_axes[2].plot(scale, 0, c='w', alpha=1, zorder=5.5, **plateau_dot_kwargs,
                         transform=big_axes[2].get_xaxis_transform()) 
-        handle = big_axes[2].plot(scale, 0, c=color, alpha=0.5, **plateau_dot_kwargs,
-                                  transform=big_axes[2].get_xaxis_transform())
+        big_axes[2].plot(scale, 0, mfc=color, mec='k', alpha=0.75, zorder=6, **plateau_dot_kwargs,
+                         transform=big_axes[2].get_xaxis_transform())
        big_axes[2].vlines(scale, big_axes[2].get_ylim()[0], measure[ind],
                           color=color, **plateau_line_kwargs)
-    big_axes[2].legend(**leg_kwargs)
-
-    # handles = plot_dist_shifted(big_axes[2], species_threshs, axis=pdf_axis,
-    #                             pdf=thresh_pdf, **plateau_dot_kwargs)[0]
-    # [h.set_color(species_colors[s]) for h, s in zip(handles, target_species)]
-                      
+    legend = big_axes[2].legend(**leg_kwargs)
+    [h.set_lw(lw['legend']) for h in legend.legend_handles]

    if save_path is not None:
        fig.savefig(save_path, bbox_inches='tight')
--- a/python/fig_invariance_log-hp_species.py
+++ b/python/fig_invariance_log-hp_species.py
@@ -0,0 +1,108 @@
+import plotstyle_plt
+import numpy as np
+import matplotlib.pyplot as plt
+from thunderhopper.filetools import search_files
+from plot_functions import ylabel, super_xlabel, letter_subplots, title_subplot
+from color_functions import load_colors
+from misc_functions import shorten_species
+
+# GENERAL SETTINGS:
+target_species = [
+    'Chorthippus_biguttulus',
+    'Chorthippus_mollis',
+    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
+    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
+    'Pseudochorthippus_parallelus',
+]
+data_path = '../data/inv/log_hp/condensed/'
+save_path = '../figures/fig_invariance_log-hp_species.pdf'
+
+# GRAPH SETTINGS:
+fig_kwargs = dict(
+    figsize=(32/2.54, 16/2.54),
+    nrows=1,
+    ncols=len(target_species),
+    sharex=True,
+    sharey=True,
+    gridspec_kw=dict(
+        wspace=0.4,
+        hspace=0,
+        left=0.07,
+        right=0.98,
+        bottom=0.1,
+        top=0.95,
+    )
+)
+
+# PLOT SETTINGS:
+colors = load_colors('../data/species_colors.npz')
+line_kwargs = dict(
+    lw=2,
+)
+fill_kwargs = dict(
+    alpha=0.3,
+    zorder=1,
+)
+xlab = 'scale $\\alpha$'
+ylab = '$\\sigma_{\\alpha}\\,/\\,\\sigma_{\\eta}$'
+xlab_kwargs = dict(
+    y=0,
+    fontsize=16,
+    ha='center',
+    va='bottom',
+)
+ylab_kwargs = dict(
+    x=0,
+    fontsize=20,
+    ha='center',
+    va='top',
+)
+title_kwargs = dict(
+    x=0.5,
+    yref=0.99,
+    ha='center',
+    va='top',
+    fontsize=16,
+    fontstyle='italic',
+)
+letter_kwargs = dict(
+    x=0.005,
+    y=0.99,
+    fontsize=22,
+    ha='left',
+    va='top',
+)
+
+# Prepare graph:
+fig, axes = plt.subplots(**fig_kwargs)
+axes[0].set_ylim(0.9, 20)
+axes[0].set_xscale('log')
+axes[0].set_yscale('log')
+super_xlabel(xlab, fig, axes[0], axes[-1], **xlab_kwargs)
+ylabel(axes[0], ylab, **ylab_kwargs, transform=fig.transFigure)
+# letter_subplots(axes, **letter_kwargs)
+
+# Run through species:
+for species, ax in zip(target_species, axes):
+    title_subplot(ax, shorten_species(species), ref=fig, **title_kwargs)
+    color = colors[species]
+    
+    # Load species data:
+    path = search_files(species, dir=data_path)[0]
+    data = dict(np.load(path))
+    scales = data['scales']
+    means = data['mean']
+    sds = data['sd']
+
+    # Plot recording-specific traces:
+    for mean, sd in zip(means.T, sds.T):
+        ax.plot(scales, mean, c=color, **line_kwargs)
+        ax.fill_between(scales, mean - sd, mean + sd, color=color, **fill_kwargs)
+
+# Save graph:
+fig.savefig(save_path)
+plt.show()  
+
+
--- a/python/plot_functions.py
+++ b/python/plot_functions.py
@@ -2,6 +2,7 @@ import string
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.transforms import Bbox, BboxTransformTo, TransformedBbox
+from misc_functions import get_kde

 def hide_ticks(ax, side='bottom', ticks=True):
    axis = 'x' if side in ['top', 'bottom'] else 'y'
@@ -298,3 +299,33 @@ def zoom_inset(ax, inset, handle, x0=None, x1=None, y0=None, y1=None, ref='x',
 def set_clip_box(artist, ax, bounds=[[0, -0.05], [1, 1.05]]):
    artist.set_clip_box(TransformedBbox(Bbox(bounds), ax.transAxes))
    return None
+
+def plot_dist_shifted(ax, data, axis, pdf=None, sigma=0.1, which='x',
+                      base=None, cap=None, add_pdf=False, shifted=False, **kwargs):
+    if pdf is None:
+        pdf, axis = get_kde(data, sigma, axis)
+    if base is None:
+        base = pdf.min()
+    if cap is None:
+        cap = pdf.max()
+    pdf = (pdf - pdf.min()) / (pdf.max() - pdf.min()) * (cap - base) + base
+
+    if which == 'x':
+        transform = ax.get_xaxis_transform()
+    elif which == 'y':
+        transform = ax.get_yaxis_transform()
+    else:
+        transform = ax.transData
+
+    rng = np.random.default_rng()
+    handles = []
+    for value in data:
+        ind = np.nonzero(axis == value)[0][0]
+        offset = base if not shifted else rng.uniform(base, pdf[ind])
+        variables = (offset, value) if which=='y' else (value, offset)
+        handles.extend(ax.plot(*variables, transform=transform, **kwargs))
+    if add_pdf:
+        variables = (pdf, axis) if which=='y' else (axis, pdf)
+        pdf_handle = ax.plot(*variables, transform=transform, c='k', lw=1)
+        return handles, pdf_handle
+    return handles
--- a/python/save_inv_data_log-hp.py
+++ b/python/save_inv_data_log-hp.py
@@ -5,16 +5,17 @@ from thunderhopper.filters import decibel, sosfilter
 from IPython import embed

 # GENERAL SETTINGS:
-target = ['Omocestus_rufipes', '*'][0]
-data_paths = search_files(target, excl='noise', dir='../data/processed/')
+example_file = 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms'
+search_target = ['*', example_file][1]
+data_paths = search_files(search_target, excl='noise', dir='../data/processed/')
 noise_path = '../data/processed/white_noise_sd-1.npz'
 save_path = '../data/inv/log_hp/'

 # ANALYSIS SETTINGS:
-add_noise = target == '*' or False
-save_snippets = target == 'Omocestus_rufipes'
+add_noise = search_target == '*' or False
+save_detailed = search_target == example_file
 example_scales = np.array([0.1, 1, 10, 30, 100, 300])
-scales = np.geomspace(0.1, 10000, 500)
+scales = np.geomspace(0.01, 10000, 1000)
 scales = np.unique(np.concatenate((scales, example_scales)))

 # PREPARATION:
@@ -35,47 +36,67 @@ for data_path, name in zip(data_paths, crop_paths(data_paths)):

    # Normalize song component:
    song /= song[segment].std()
-
-    # Rescale song component:
-    mix = song[:, None] * scales[None, :]
-
    if add_noise:
-        # Add normalized noise component:
+        # Get normalized noise component:
        noise = pure_noise[:song.shape[0]]
        noise /= noise[segment].std()
-        mix += noise[:, None]

-    # Process mixture:
-    mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp',
-                    padtype='even', padlen=config['padlen'])
-    mix_log = decibel(mix, ref=1)
-    mix_inv = sosfilter(mix_log, rate, config['inv_fcut'], 'hp',
-                        padtype='constant', padlen=config['padlen'])
+    # Prepare storage:
+    measure_inv = np.zeros_like(scales)
+    if save_detailed:
+        # Prepare optional storage:
+        measure_env = np.zeros_like(scales)
+        measure_log = np.zeros_like(scales)
+        snip_env = np.zeros((song.shape[0], example_scales.size))
+        snip_log = np.zeros((song.shape[0], example_scales.size))
+        snip_inv = np.zeros((song.shape[0], example_scales.size))

-    # Get intensity measure per stage:
-    measure_env = mix[segment, :].std(axis=0)
-    measure_log = mix_log[segment, :].std(axis=0)
-    measure_inv = mix_inv[segment, :].std(axis=0)
+    # Execute piecewise:
+    for i, scale in enumerate(scales):
+
+        # Get scaled mixture:
+        mix = song * scale
+        if add_noise:
+            mix += noise
+
+        # Process mixture:
+        mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp',
+                        padtype='even', padlen=config['padlen'])
+        mix_log = decibel(mix, ref=1)
+        mix_inv = sosfilter(mix_log, rate, config['inv_fcut'], 'hp',
+                            padtype='constant', padlen=config['padlen'])
+
+        # Log intensity measures:
+        measure_inv[i] = mix_inv[segment].std()
+        if save_detailed:
+            measure_env[i] = mix[segment].std()
+            measure_log[i] = mix_log[segment].std()
+            if scale in example_scales:
+                # Log snippet data:
+                save_ind = np.nonzero(example_scales == scale)[0][0]
+                snip_env[:, save_ind] = mix
+                snip_log[:, save_ind] = mix_log
+                snip_inv[:, save_ind] = mix_inv

    # Save analysis results:
-    save_inds = np.nonzero(np.isin(scales, example_scales))[0]
    if save_path is not None:
-        data = dict(
+        archive = dict(
            scales=scales,
            example_scales=example_scales,
-            measure_env=measure_env,
-            measure_log=measure_log,
            measure_inv=measure_inv,
-            )
-        if save_snippets:
-            data.update(
-                snip_env=mix[:, save_inds],
-                snip_log=mix_log[:, save_inds],
-                snip_inv=mix_inv[:, save_inds],
+        )
+        if save_detailed:
+            archive.update(
+                measure_env=measure_env,
+                measure_log=measure_log,
+                snip_env=snip_env,
+                snip_log=snip_log,
+                snip_inv=snip_inv,
            )
        file_name = save_path + name
        if add_noise:
            file_name += '_noise'
-        save_data(file_name, data, config, overwrite=True)
+        save_data(file_name, archive, config, overwrite=True)
+
 print('Done.')
 embed()
--- a/python/save_snippet_data.py
+++ b/python/save_snippet_data.py
@@ -7,9 +7,10 @@ from IPython import embed
 ## SETTINGS:

 # General:
+search_target = '*'
 input_folder = '../data/raw/'
 output_folder = '../data/processed/'
-stages = ['raw', 'filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat', 'norm']
+stages = ['raw', 'filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat']
 if False:
    # Overwrites edited:
    stages.append('songs')
@@ -30,7 +31,7 @@ config.update({
    'rate_ratio': None,
    'env_fcut': 250,
    'db_ref': 1,
-    'inv_fcut': 5,
+    'inv_fcut': 10,
    'feat_thresh': np.load('../data/kernel_thresholds.npy') * 0.2,
    'feat_fcut': 0.5,
    'label_channels': 0,
@@ -40,7 +41,7 @@ config.update({
 ## PREPARATION:

 # Fetch WAV recording files:
-input_paths = search_files(ext='wav', dir=input_folder)
+input_paths = search_files(search_target, ext='wav', dir=input_folder)
 path_names = crop_paths(input_paths)

 # PROCESSING:
--- a/python/save_species_colors.py
+++ b/python/save_species_colors.py
@@ -6,11 +6,12 @@ from IPython import embed

 # Settings:
 species = [
-    'Omocestus_rufipes',
    'Chorthippus_biguttulus',
    'Chorthippus_mollis',
    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
    'Pseudochorthippus_parallelus',
 ]
 file_name = '../data/species_colors.npz'