Seriously, no idea. Wild amount of changes. Good luck.

This commit is contained in:
j-hartling
2026-04-17 17:19:30 +02:00
parent 36ac504efa
commit 3b4b7f2161
40 changed files with 2067 additions and 672 deletions

View File

@@ -0,0 +1,87 @@
import numpy as np
from thunderhopper.filetools import search_files
from thunderhopper.modeltools import load_data, save_data
from misc_functions import sort_files_by_rec
from IPython import embed
# GENERAL SETTINGS:
target_species = [
'Chorthippus_biguttulus',
'Chorthippus_mollis',
'Chrysochraon_dispar',
'Euchorthippus_declivus',
'Gomphocerippus_rufus',
'Omocestus_rufipes',
'Pseudochorthippus_parallelus',
]
sources = [
'BM04',
'BM93',
'DJN',
'GBC',
'FTN'
]
stages = ['filt', 'env', 'log', 'inv', 'conv', 'feat']
search_path = '../data/inv/full/'
save_path = '../data/inv/full/condensed/'
# ANALYSIS SETTINGS:
compute_ratios = False
# EXECUTION:
for i, species in enumerate(target_species):
print(f'Processing {species}')
# Fetch all species-specific song files:
all_paths = search_files(species, ext='npz', dir=search_path)
if not all_paths:
continue
# Sort song files by recording (one or more per source):
sorted_paths = sort_files_by_rec(all_paths, sources)
# Condense across song files per recording:
for j, rec_paths in enumerate(sorted_paths):
for k, path in enumerate(rec_paths):
# Load invariance data:
data, config = load_data(path, 'scales', 'measure')
if k == 0:
# Prepare song file-specific storage:
file_data = {}
for stage in stages:
shape = data[f'measure_{stage}'].shape + (len(rec_paths),)
file_data[stage] = np.zeros(shape, dtype=float)
if j == 0:
# Prepare recording-specific storage:
rec_mean, rec_sd = {}, {}
for stage in stages:
shape = data[f'measure_{stage}'].shape + (len(sorted_paths),)
rec_mean[f'mean_{stage}'] = np.zeros(shape, dtype=float)
rec_sd[f'sd_{stage}'] = np.zeros(shape, dtype=float)
# Log song file data:
for stage in stages:
mkey = f'measure_{stage}'
if compute_ratios:
data[mkey] /= data[mkey][0]
file_data[stage][..., k] = data[mkey]
# Get recording statistics:
for stage in stages:
rec_mean[f'mean_{stage}'][..., j] = np.nanmean(file_data[stage], axis=-1)
rec_sd[f'sd_{stage}'][..., j] = np.nanstd(file_data[stage], axis=-1)
# Save condensed recording data:
save_name = save_path + species
if compute_ratios:
save_name += '_normed'
else:
save_name += '_raw'
archive = dict(scales=data['scales'])
archive.update(rec_mean)
archive.update(rec_sd)
save_data(save_name, archive, config, overwrite=True)
print('Done.')