Seriously, no idea. Wild amount of changes. Good luck.

2026-04-17 17:19:30 +02:00
parent 36ac504efa
commit 3b4b7f2161
40 changed files with 2067 additions and 672 deletions
--- a/python/condense_inv_data_full.py
+++ b/python/condense_inv_data_full.py
@@ -0,0 +1,87 @@
+import numpy as np
+from thunderhopper.filetools import search_files
+from thunderhopper.modeltools import load_data, save_data
+from misc_functions import sort_files_by_rec
+from IPython import embed
+
+# GENERAL SETTINGS:
+target_species = [
+    'Chorthippus_biguttulus',
+    'Chorthippus_mollis',
+    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
+    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
+    'Pseudochorthippus_parallelus',
+]
+sources = [
+    'BM04',
+    'BM93',
+    'DJN',
+    'GBC',
+    'FTN'
+]
+stages = ['filt', 'env', 'log', 'inv', 'conv', 'feat']
+search_path = '../data/inv/full/'
+save_path = '../data/inv/full/condensed/'
+
+# ANALYSIS SETTINGS:
+compute_ratios = False
+
+# EXECUTION:
+for i, species in enumerate(target_species):
+    print(f'Processing {species}')
+
+    # Fetch all species-specific song files:
+    all_paths = search_files(species, ext='npz', dir=search_path)
+    if not all_paths:
+        continue
+
+    # Sort song files by recording (one or more per source):
+    sorted_paths = sort_files_by_rec(all_paths, sources)
+    
+    # Condense across song files per recording:
+    for j, rec_paths in enumerate(sorted_paths):
+        for k, path in enumerate(rec_paths):
+
+            # Load invariance data:
+            data, config = load_data(path, 'scales', 'measure')
+
+            if k == 0:
+                # Prepare song file-specific storage:
+                file_data = {}
+                for stage in stages:
+                    shape = data[f'measure_{stage}'].shape + (len(rec_paths),)
+                    file_data[stage] = np.zeros(shape, dtype=float)
+                if j == 0:
+                    # Prepare recording-specific storage:
+                    rec_mean, rec_sd = {}, {}
+                    for stage in stages:
+                        shape = data[f'measure_{stage}'].shape + (len(sorted_paths),)
+                        rec_mean[f'mean_{stage}'] = np.zeros(shape, dtype=float)
+                        rec_sd[f'sd_{stage}'] = np.zeros(shape, dtype=float)
+
+            # Log song file data:
+            for stage in stages:
+                mkey = f'measure_{stage}'
+                if compute_ratios:
+                    data[mkey] /= data[mkey][0]
+                file_data[stage][..., k] = data[mkey]
+
+        # Get recording statistics:
+        for stage in stages:
+            rec_mean[f'mean_{stage}'][..., j] = np.nanmean(file_data[stage], axis=-1)
+            rec_sd[f'sd_{stage}'][..., j] = np.nanstd(file_data[stage], axis=-1)
+
+    # Save condensed recording data:
+    save_name = save_path + species
+    if compute_ratios:
+        save_name += '_normed'
+    else:
+        save_name += '_raw'
+    archive = dict(scales=data['scales'])
+    archive.update(rec_mean)
+    archive.update(rec_sd)
+    save_data(save_name, archive, config, overwrite=True)
+
+print('Done.')