import numpy as np
from thunderhopper.filetools import search_files
from thunderhopper.modeltools import load_data, save_data
from IPython import embed

# GENERAL SETTINGS:
target_species = [
    'Chorthippus_biguttulus',
    'Chorthippus_mollis',
    'Chrysochraon_dispar',
    'Euchorthippus_declivus',
    'Gomphocerippus_rufus',
    'Omocestus_rufipes',
    'Pseudochorthippus_parallelus',
]
stages = ['filt', 'env', 'conv', 'feat']
search_path = '../data/inv/short/'
save_path = '../data/inv/short/collected/'

# EXECUTION:
for i, species in enumerate(target_species):
    print(f'Processing {species}')

    # Fetch all species-specific song files:
    all_paths = search_files(species, ext='npz', dir=search_path)
    if not all_paths:
        continue

    # Run through files:
    for j, path in enumerate(all_paths):

        # Load invariance data:
        data, config = load_data(path, 'scales', 'measure')

        if j == 0:
            # Prepare species-specific storage:
            species_data = dict(scales=data['scales'])
            for stage in stages:
                mkey = f'measure_{stage}'
                shape = data[mkey].shape + (len(all_paths),)
                species_data[mkey] = np.zeros(shape, dtype=float)

        # Log species data:
        for stage in stages:
            mkey = f'measure_{stage}'
            species_data[mkey][..., j] = data[mkey]

    # Save collected file data:
    save_name = save_path + species
    save_data(save_name, species_data, config, overwrite=True)

print('Done.')