import numpy as np from thunderhopper.filetools import search_files from thunderhopper.modeltools import load_data, save_data from misc_functions import get_saturation from IPython import embed # GENERAL SETTINGS: target_species = [ 'Chorthippus_biguttulus', 'Chorthippus_mollis', 'Chrysochraon_dispar', 'Euchorthippus_declivus', 'Gomphocerippus_rufus', 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ] collect_path = '../data/inv/log_hp/collected/' condense_path = '../data/inv/log_hp/condensed/' save_path = '../data/inv/log_hp/saturation/' # ANALYSIS SETTINGS: plateau_settings = dict( low=0.05, high=0.95, first=True, last=True, condense=None, ) compute_hist = True bins = 50 pad = 0.05 # PREPARATION: if compute_hist: species_scales = [] min_scale, max_scale = np.inf, -np.inf archives = [{} for _ in target_species] # EXECUTION: for i, species in enumerate(target_species): print(f'Processing {species}') # Load accumulated invariance data: path = search_files(species, dir=collect_path)[0] data, config = load_data(path, ['scales', 'measure_inv']) # Find upper saturation point per song file: crit_inds = np.array(get_saturation(data['measure_inv'], **plateau_settings)[1]) crit_scales = data['scales'][crit_inds] # Load condensed invariance data: path = search_files(species, incl=['noise', 'norm-base'], dir=condense_path)[0] data, _ = load_data(path, ['scales', 'mean_inv']) # Find single upper saturation point of condensed curve: crit_ind = get_saturation(data['mean_inv'].mean(axis=-1), **plateau_settings)[1] crit_scale = data['scales'][crit_ind] # Output options: if not compute_hist: # Save species data immediately: archive = dict( scales=data['scales'], crit_inds=crit_inds, crit_scales=crit_scales, crit_ind=crit_ind, crit_scale=crit_scale, ) save_data(save_path + species, archive, config, overwrite=True) continue # Log but don't save data yet: min_scale = min(crit_scales.min(), min_scale) max_scale = max(crit_scales.max(), max_scale) archives[i].update( scales=data['scales'], crit_inds=crit_inds, crit_scales=crit_scales, crit_ind=crit_ind, crit_scale=crit_scale, ) # Optional histogram: if compute_hist: # Generated shared bin edges: pad *= (max_scale - min_scale) edges = np.linspace(max(0, min_scale - pad), max_scale + pad, bins + 1) centers = edges[:-1] + np.diff(edges) / 2 # Compute histogram and save species data: for i, (species, archive) in enumerate(zip(target_species, archives)): hist = np.histogram(archive['crit_scales'], bins=edges, density=True)[0] archive['hist'] = hist archive['bins'] = centers save_data(save_path + species, archive, config, overwrite=True) print('Done.')