import numpy as np import matplotlib.pyplot as plt from thunderhopper.filetools import search_files, crop_paths from thunderhopper.modeltools import load_data, save_data from misc_functions import shorten_species from IPython import embed # GENERAL SETTINGS: target_species = [ 'Chorthippus_biguttulus', 'Chorthippus_mollis', 'Chrysochraon_dispar', 'Euchorthippus_declivus', 'Gomphocerippus_rufus', 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ] sources = [ 'BM04', 'BM93', 'DJN', 'GBC', 'FTN' ] search_path = '../data/inv/log_hp/' ref_path = '../data/inv/log_hp/ref_measures.npz' save_path = '../data/inv/log_hp/condensed/' # ANALYSIS SETTINGS: compute_ratios = True plot_overview = True # PREPARATION: if compute_ratios: ref_measure = np.load(ref_path)['inv'] if plot_overview: fig, axes = plt.subplots(3, len(target_species), figsize=(16, 9), sharex=True, sharey=True, layout='constrained') axes[0, 0].set_ylabel('songs') axes[1, 0].set_ylabel('recordings\n(mean ± SD)') axes[2, 0].set_ylabel('total\n(mean ± SEM)') # EXECUTION: for i, species in enumerate(target_species): print(f'Processing {species}') if plot_overview: axes[0, i].set_title(shorten_species(species)) # Fetch all species-specific song files: all_paths = search_files(species, incl='noise', ext='npz', dir=search_path) # Separate by source: sorted_paths = {} for source in sources: # Check for any source-specific song files: source_paths = [path for path in all_paths if source in path] if not source_paths: continue # Separate by recording: sorted_paths[source] = [[]] for path, name in zip(source_paths, crop_paths(source_paths)): # Find numerical ID behind source tag: id_ind = name.find(source) + len(source) + 1 # Check if ID is followed by sub-ID: sub_id = name[id_ind:].split('-')[1] if 's' in sub_id: # Single (time stamp in next spot): sorted_paths[source][0].append(path) continue sub_id = int(sub_id) # Multiple (sub-ID in next spot): if sub_id > len(sorted_paths[source]): # Open new recording-specific slot: sorted_paths[source].append([]) sorted_paths[source][sub_id - 1].append(path) # Re-sort song files only by recording (discarding source separation): sorted_paths = [path for paths in sorted_paths.values() for path in paths] # Condense across song files per recording: for j, rec_paths in enumerate(sorted_paths): for k, path in enumerate(rec_paths): # Load invariance data: data, _ = load_data(path, ['scales', 'measure_inv']) scales, measure = data['scales'], data['measure_inv'] # Relate to noise: if compute_ratios: measure /= ref_measure if k == 0: # Prepare song file-specific storage: file_data = np.zeros((scales.size, len(rec_paths)), dtype=float) if j == 0: # Prepare recording-specific storage: rec_mean = np.zeros((scales.size, len(sorted_paths)), dtype=float) rec_sd = np.zeros((scales.size, len(sorted_paths)), dtype=float) # Log song file data: file_data[:, k] = measure if plot_overview: axes[0, i].plot(scales, measure, c='k', alpha=0.5) # Get recording statistics: rec_mean[:, j] = file_data.mean(axis=1) rec_sd[:, j] = file_data.std(axis=1) if plot_overview: axes[1, i].plot(scales, rec_mean[:, j], c='k') axes[1, i].fill_between(scales, rec_mean[:, j] - rec_sd[:, j], rec_mean[:, j] + rec_sd[:, j], color='k', alpha=0.2) # Save condensed recording data for current species: np.savez(save_path + species, scales=scales, mean=rec_mean, sd=rec_sd) if plot_overview: spec_mean = rec_mean.mean(axis=1) spec_sd = rec_mean.std(axis=1) axes[2, i].plot(scales, spec_mean, c='k') axes[2, i].fill_between(scales, spec_mean - spec_sd, spec_mean + spec_sd, color='k', alpha=0.2) print('Done.') if plot_overview: axes[0, 0].set_xlim(scales[0], scales[-1]) axes[0, 0].set_xscale('log') axes[0, 0].set_yscale('log') plt.show()