Added multi-thresh simulation to "full" and "short" (currently running).

Added complete "rect-lp" analysis except figure. Added multiple appendix figs. Overhauled normalization options across all condense scripts. Co-authored-by: Copilot <copilot@github.com>
2026-04-24 16:50:14 +02:00
parent 1a586848e8
commit 5411a309f7
48 changed files with 1549 additions and 300 deletions
--- a/python/condense_inv_data_rect-lp.py
+++ b/python/condense_inv_data_rect-lp.py
@@ -0,0 +1,109 @@
+import numpy as np
+from thunderhopper.filetools import search_files
+from thunderhopper.modeltools import load_data, save_data
+from misc_functions import sort_files_by_rec
+from IPython import embed
+
+# GENERAL SETTINGS:
+target_species = [
+    'Chorthippus_biguttulus',
+    'Chorthippus_mollis',
+    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
+    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
+    'Pseudochorthippus_parallelus',
+]
+sources = [
+    'BM04',
+    'BM93',
+    'DJN',
+    'GBC',
+    'FTN'
+]
+stages = ['filt', 'env']
+search_path = '../data/inv/rect_lp/'
+save_path = '../data/inv/rect_lp/condensed/'
+
+# ANALYSIS SETTINGS:
+mode = ['pure', 'noise'][1]
+normalization = [
+    'none',
+    'min',
+    'max',
+    'base',
+    'range',
+    ][3]
+suffix = dict(
+    none='_unnormed',
+    min='_norm-min',
+    max='_norm-max',
+    base='_norm-base',
+    range='_norm-range'
+)[normalization]
+
+# EXECUTION:
+for i, species in enumerate(target_species):
+    print(f'Processing {species}')
+
+    # Fetch all species-specific song files:
+    all_paths = search_files(species, incl=mode, ext='npz', dir=search_path)
+    
+    # Sort song files by recording (one or more per source):
+    sorted_paths = sort_files_by_rec(all_paths, sources)
+    
+    # Condense across song files per recording:
+    for j, rec_paths in enumerate(sorted_paths):
+        for k, path in enumerate(rec_paths):
+
+            # Load invariance data:
+            data, config = load_data(path, 'scales', 'measure')
+
+            if k == 0:
+                # Prepare song file-specific storage:
+                file_data = {}
+                for stage in stages:
+                    shape = data[f'measure_{stage}'].shape + (len(rec_paths),)
+                    file_data[stage] = np.zeros(shape, dtype=float)
+                if j == 0:
+                    # Prepare recording-specific storage:
+                    rec_mean, rec_sd = {}, {}
+                    for stage in stages:
+                        shape = data[f'measure_{stage}'].shape + (len(sorted_paths),)
+                        rec_mean[f'mean_{stage}'] = np.zeros(shape, dtype=float)
+                        rec_sd[f'sd_{stage}'] = np.zeros(shape, dtype=float)
+
+            # Log song file data:
+            for stage in stages:
+                mkey = f'measure_{stage}'
+
+                if normalization == 'min':
+                    # Minimum normalization:
+                    data[mkey] /= data[mkey].min(axis=0, keepdims=True)
+                elif normalization == 'max':
+                    # Maximum normalization:
+                    data[mkey] /= data[mkey].max(axis=0, keepdims=True)
+                elif normalization == 'base':
+                    # Noise baseline normalization:
+                    data[mkey] /= data[mkey][0]
+                elif normalization == 'range':
+                    # Min-max normalization:
+                    min_measure = data[mkey].min(axis=0, keepdims=True)
+                    max_measure = data[mkey].max(axis=0, keepdims=True)
+                    data[mkey] = (data[mkey] - min_measure) / (max_measure - min_measure)
+
+                file_data[stage][..., k] = data[mkey]
+
+        # Get recording statistics:
+        for stage in stages:
+            rec_mean[f'mean_{stage}'][..., j] = np.nanmean(file_data[stage], axis=-1)
+            rec_sd[f'sd_{stage}'][..., j] = np.nanstd(file_data[stage], axis=-1)
+
+    # Save condensed recording data:
+    archive = dict(scales=data['scales'])
+    archive.update(rec_mean)
+    archive.update(rec_sd)
+    save_name = save_path + species + '_' + mode + suffix
+    save_data(save_name, archive, config, overwrite=True)
+
+print('Done.')