paper_2025/python/save_snippet_data.py

import numpy as np
from thunderhopper.filetools import search_files, crop_paths
from thunderhopper.model import configuration, process_signal
from thunderhopper.modeltools import load_data
from IPython import embed

## SETTINGS:

# General:
overwrite = True
input_folder = '../data/raw/'
output_folder = '../data/processed/'
stages = ['raw', 'filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat', 'norm']
if True:
    # Overwrites edited:
    stages.append('songs')

# Interactivity:
reload_saved = False
gui = False

# Processing:
env_rate = 44100.0
feat_rate = 44100.0
sigmas = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032]
types = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5,
         6, -6, 7, -7, 8, -8, 9, -9, 10, -10]
config = configuration(env_rate, feat_rate, types=types, sigmas=sigmas)
config.update({
    'channel': 0,
    'rate_ratio': None,
    'env_fcut': 250,
    'inv_fcut': 5,
    'feat_thresh': np.load('../data/kernel_thresholds.npy') * 0.1,
    'feat_fcut': 0.5,
    'label_channels': 0,
    'label_thresh': 0.5,
    })

## PREPARATION:

# Fetch WAV recording files:
input_paths = search_files(ext='wav', dir=input_folder)
path_names = crop_paths(input_paths)

# PROCESSING:

# Run processing pipeline:
for path, name in zip(input_paths, path_names):
    print('Processing:', name)

    # Fetch and store representations:
    save = None if output_folder is None else output_folder + f'{name}.npz'
    process_signal(config, stages, path, save=save,
                   label_edit=gui, overwrite=overwrite)

    # Cross-control:
    if reload_saved:
        data, params = load_data(save, stages, ['songs', 'noise'])
        embed()
print('Done.')