import numpy as np from thunderhopper.filetools import search_files from thunderhopper.modeltools import load_data, save_data from misc_functions import sort_files_by_rec from IPython import embed # GENERAL SETTINGS: target_species = [ 'Chorthippus_biguttulus', 'Chorthippus_mollis', 'Chrysochraon_dispar', 'Euchorthippus_declivus', 'Gomphocerippus_rufus', 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ] sources = [ 'BM04', 'BM93', 'DJN', 'GBC', 'FTN' ] stages = ['filt', 'env'] search_path = '../data/inv/rect_lp/' save_path = '../data/inv/rect_lp/condensed/' # ANALYSIS SETTINGS: mode = ['pure', 'noise'][1] normalization = [ 'none', 'min', 'max', 'base', 'range', ][0] suffix = dict( none='_unnormed', min='_norm-min', max='_norm-max', base='_norm-base', range='_norm-range' )[normalization] # EXECUTION: for i, species in enumerate(target_species): print(f'Processing {species}') # Fetch all species-specific song files: all_paths = search_files(species, incl=mode, ext='npz', dir=search_path) # Sort song files by recording (one or more per source): sorted_paths = sort_files_by_rec(all_paths, sources) # Condense across song files per recording: for j, rec_paths in enumerate(sorted_paths): for k, path in enumerate(rec_paths): # Load invariance data: data, config = load_data(path, 'scales', 'measure') if k == 0: # Prepare song file-specific storage: file_data = {} for stage in stages: shape = data[f'measure_{stage}'].shape + (len(rec_paths),) file_data[stage] = np.zeros(shape, dtype=float) if j == 0: # Prepare recording-specific storage: rec_mean, rec_sd = {}, {} for stage in stages: shape = data[f'measure_{stage}'].shape + (len(sorted_paths),) rec_mean[f'mean_{stage}'] = np.zeros(shape, dtype=float) rec_sd[f'sd_{stage}'] = np.zeros(shape, dtype=float) # Log song file data: for stage in stages: mkey = f'measure_{stage}' if normalization == 'min': # Minimum normalization: data[mkey] /= data[mkey].min(axis=0, keepdims=True) elif normalization == 'max': # Maximum normalization: data[mkey] /= data[mkey].max(axis=0, keepdims=True) elif normalization == 'base': # Noise baseline normalization: data[mkey] /= data[mkey][0] elif normalization == 'range': # Min-max normalization: min_measure = data[mkey].min(axis=0, keepdims=True) max_measure = data[mkey].max(axis=0, keepdims=True) data[mkey] = (data[mkey] - min_measure) / (max_measure - min_measure) file_data[stage][..., k] = data[mkey] # Get recording statistics: for stage in stages: rec_mean[f'mean_{stage}'][..., j] = np.nanmean(file_data[stage], axis=-1) rec_sd[f'sd_{stage}'][..., j] = np.nanstd(file_data[stage], axis=-1) # Save condensed recording data: archive = dict(scales=data['scales']) archive.update(rec_mean) archive.update(rec_sd) save_name = save_path + species + '_' + mode + suffix save_data(save_name, archive, config, overwrite=True) print('Done.')