Fetched bunch of species-specific song snippets.

Worked those into LogHP analysis. Worked results into fig_invariance_log-hp.pdf. Put details into new fig_invariance_log-hp_species.pdf (appendix).
2026-04-14 17:30:58 +02:00
parent 0b9264b1e1
commit 36ac504efa
17 changed files with 490 additions and 205 deletions
--- a/python/condense_inv_data_log-hp.py
+++ b/python/condense_inv_data_log-hp.py
@@ -0,0 +1,136 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from thunderhopper.filetools import search_files, crop_paths
+from thunderhopper.modeltools import load_data, save_data
+from misc_functions import shorten_species
+from IPython import embed
+
+# GENERAL SETTINGS:
+target_species = [
+    'Chorthippus_biguttulus',
+    'Chorthippus_mollis',
+    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
+    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
+    'Pseudochorthippus_parallelus',
+]
+sources = [
+    'BM04',
+    'BM93',
+    'DJN',
+    'GBC',
+    'FTN'
+]
+search_path = '../data/inv/log_hp/'
+ref_path = '../data/inv/log_hp/ref_measures.npz'
+save_path = '../data/inv/log_hp/condensed/'
+
+# ANALYSIS SETTINGS:
+compute_ratios = True
+plot_overview = True
+
+# PREPARATION:
+if compute_ratios:
+    ref_measure = np.load(ref_path)['inv']
+if plot_overview:
+    fig, axes = plt.subplots(3, len(target_species), figsize=(16, 9),
+                             sharex=True, sharey=True, layout='constrained')
+    axes[0, 0].set_ylabel('songs')
+    axes[1, 0].set_ylabel('recordings\n(mean ± SD)')
+    axes[2, 0].set_ylabel('total\n(mean ± SEM)')
+
+# EXECUTION:
+for i, species in enumerate(target_species):
+    print(f'Processing {species}')
+    if plot_overview:
+        axes[0, i].set_title(shorten_species(species))
+
+    # Fetch all species-specific song files:
+    all_paths = search_files(species, incl='noise', ext='npz', dir=search_path)
+
+    # Separate by source:
+    sorted_paths = {}
+    for source in sources:
+
+        # Check for any source-specific song files:
+        source_paths = [path for path in all_paths if source in path]
+        if not source_paths:
+            continue
+
+        # Separate by recording:
+        sorted_paths[source] = [[]]
+        for path, name in zip(source_paths, crop_paths(source_paths)):
+
+            # Find numerical ID behind source tag:
+            id_ind = name.find(source) + len(source) + 1
+            # Check if ID is followed by sub-ID:
+            sub_id = name[id_ind:].split('-')[1]
+            if 's' in sub_id:
+                # Single (time stamp in next spot):
+                sorted_paths[source][0].append(path)
+                continue
+            sub_id = int(sub_id)
+            # Multiple (sub-ID in next spot):
+            if sub_id > len(sorted_paths[source]):
+                # Open new recording-specific slot:
+                sorted_paths[source].append([])
+            sorted_paths[source][sub_id - 1].append(path)
+    
+    # Re-sort song files only by recording (discarding source separation):
+    sorted_paths = [path for paths in sorted_paths.values() for path in paths]
+    
+    # Condense across song files per recording:
+    for j, rec_paths in enumerate(sorted_paths):
+        for k, path in enumerate(rec_paths):
+
+            # Load invariance data:
+            data, _ = load_data(path, ['scales', 'measure_inv'])
+            scales, measure = data['scales'], data['measure_inv']
+
+            # Relate to noise:
+            if compute_ratios:
+                measure /= ref_measure
+
+            if k == 0:
+                # Prepare song file-specific storage:
+                file_data = np.zeros((scales.size, len(rec_paths)), dtype=float)
+                if j == 0:
+                    # Prepare recording-specific storage:
+                    rec_mean = np.zeros((scales.size, len(sorted_paths)), dtype=float)
+                    rec_sd = np.zeros((scales.size, len(sorted_paths)), dtype=float)
+
+            # Log song file data:
+            file_data[:, k] = measure
+
+            if plot_overview:
+                axes[0, i].plot(scales, measure, c='k', alpha=0.5)
+
+        # Get recording statistics:
+        rec_mean[:, j] = file_data.mean(axis=1)
+        rec_sd[:, j] = file_data.std(axis=1)
+
+        if plot_overview:
+            axes[1, i].plot(scales, rec_mean[:, j], c='k')
+            axes[1, i].fill_between(scales, rec_mean[:, j] - rec_sd[:, j],
+                                    rec_mean[:, j] + rec_sd[:, j], color='k', alpha=0.2)
+
+    # Save condensed recording data for current species:
+    np.savez(save_path + species, scales=scales, mean=rec_mean, sd=rec_sd)
+
+    if plot_overview:
+        spec_mean = rec_mean.mean(axis=1)
+        spec_sd = rec_mean.std(axis=1)
+        axes[2, i].plot(scales, spec_mean, c='k')
+        axes[2, i].fill_between(scales, spec_mean - spec_sd, spec_mean + spec_sd,
+                                color='k', alpha=0.2)
+
+print('Done.')
+
+if plot_overview:
+    axes[0, 0].set_xlim(scales[0], scales[-1])
+    axes[0, 0].set_xscale('log')
+    axes[0, 0].set_yscale('log')
+plt.show()
+
+