Seriously, no idea. Wild amount of changes. Good luck.

2026-04-17 17:19:30 +02:00
parent 36ac504efa
commit 3b4b7f2161
40 changed files with 2067 additions and 672 deletions
--- a/python/condense_inv_data_log-hp.py
+++ b/python/condense_inv_data_log-hp.py
@@ -1,8 +1,8 @@
 import numpy as np
 import matplotlib.pyplot as plt
-from thunderhopper.filetools import search_files, crop_paths
+from thunderhopper.filetools import search_files
 from thunderhopper.modeltools import load_data, save_data
-from misc_functions import shorten_species
+from misc_functions import shorten_species, sort_files_by_rec
 from IPython import embed

 # GENERAL SETTINGS:
@@ -23,7 +23,6 @@ sources = [
    'FTN'
 ]
 search_path = '../data/inv/log_hp/'
-ref_path = '../data/inv/log_hp/ref_measures.npz'
 save_path = '../data/inv/log_hp/condensed/'

 # ANALYSIS SETTINGS:
@@ -31,14 +30,12 @@ compute_ratios = True
 plot_overview = True

 # PREPARATION:
-if compute_ratios:
-    ref_measure = np.load(ref_path)['inv']
 if plot_overview:
    fig, axes = plt.subplots(3, len(target_species), figsize=(16, 9),
                             sharex=True, sharey=True, layout='constrained')
    axes[0, 0].set_ylabel('songs')
    axes[1, 0].set_ylabel('recordings\n(mean ± SD)')
-    axes[2, 0].set_ylabel('total\n(mean ± SEM)')
+    axes[2, 0].set_ylabel('total\n(mean ± SD)')

 # EXECUTION:
 for i, species in enumerate(target_species):
@@ -48,49 +45,21 @@ for i, species in enumerate(target_species):

    # Fetch all species-specific song files:
    all_paths = search_files(species, incl='noise', ext='npz', dir=search_path)
-
-    # Separate by source:
-    sorted_paths = {}
-    for source in sources:
-
-        # Check for any source-specific song files:
-        source_paths = [path for path in all_paths if source in path]
-        if not source_paths:
-            continue
-
-        # Separate by recording:
-        sorted_paths[source] = [[]]
-        for path, name in zip(source_paths, crop_paths(source_paths)):
-
-            # Find numerical ID behind source tag:
-            id_ind = name.find(source) + len(source) + 1
-            # Check if ID is followed by sub-ID:
-            sub_id = name[id_ind:].split('-')[1]
-            if 's' in sub_id:
-                # Single (time stamp in next spot):
-                sorted_paths[source][0].append(path)
-                continue
-            sub_id = int(sub_id)
-            # Multiple (sub-ID in next spot):
-            if sub_id > len(sorted_paths[source]):
-                # Open new recording-specific slot:
-                sorted_paths[source].append([])
-            sorted_paths[source][sub_id - 1].append(path)
    
-    # Re-sort song files only by recording (discarding source separation):
-    sorted_paths = [path for paths in sorted_paths.values() for path in paths]
+    # Sort song files by recording (one or more per source):
+    sorted_paths = sort_files_by_rec(all_paths, sources)
    
    # Condense across song files per recording:
    for j, rec_paths in enumerate(sorted_paths):
        for k, path in enumerate(rec_paths):

            # Load invariance data:
-            data, _ = load_data(path, ['scales', 'measure_inv'])
+            data, config = load_data(path, ['scales', 'measure_inv'])
            scales, measure = data['scales'], data['measure_inv']

            # Relate to noise:
            if compute_ratios:
-                measure /= ref_measure
+                measure /= measure[0]

            if k == 0:
                # Prepare song file-specific storage:
@@ -116,7 +85,8 @@ for i, species in enumerate(target_species):
                                    rec_mean[:, j] + rec_sd[:, j], color='k', alpha=0.2)

    # Save condensed recording data for current species:
-    np.savez(save_path + species, scales=scales, mean=rec_mean, sd=rec_sd)
+    archive = dict(scales=scales, mean_inv=rec_mean, sd_inv=rec_sd)
+    save_data(save_path + species, archive, config, overwrite=True)

    if plot_overview:
        spec_mean = rec_mean.mean(axis=1)
@@ -128,9 +98,7 @@ for i, species in enumerate(target_species):
 print('Done.')

 if plot_overview:
-    axes[0, 0].set_xlim(scales[0], scales[-1])
    axes[0, 0].set_xscale('log')
    axes[0, 0].set_yscale('log')
-plt.show()
-
-            
+    axes[0, 0].set_xlim(scales[1], scales[-1])
+plt.show()