paper_2025/python/save_snippet_data.py
2026-02-17 16:46:02 +01:00

62 lines
1.6 KiB
Python

import numpy as np
from thunderhopper.filetools import search_files, crop_paths
from thunderhopper.model import configuration, process_signal
from thunderhopper.modeltools import load_data
from IPython import embed
## SETTINGS:
# General:
overwrite = True
input_folder = '../data/raw/'
output_folder = '../data/processed/'
stages = ['raw', 'filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat', 'norm']
if True:
# Overwrites edited:
stages.append('songs')
# Interactivity:
reload_saved = False
gui = False
# Processing:
env_rate = 44100.0
feat_rate = 44100.0
sigmas = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032]
types = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5,
6, -6, 7, -7, 8, -8, 9, -9, 10, -10]
config = configuration(env_rate, feat_rate, types=types, sigmas=sigmas)
config.update({
'channel': 0,
'rate_ratio': None,
'env_fcut': 250,
'inv_fcut': 5,
'feat_thresh': np.load('../data/kernel_thresholds.npy') * 0.1,
'feat_fcut': 0.5,
'label_channels': 0,
'label_thresh': 0.5,
})
## PREPARATION:
# Fetch WAV recording files:
input_paths = search_files(ext='wav', dir=input_folder)
path_names = crop_paths(input_paths)
# PROCESSING:
# Run processing pipeline:
for path, name in zip(input_paths, path_names):
print('Processing:', name)
# Fetch and store representations:
save = None if output_folder is None else output_folder + f'{name}.npz'
process_signal(config, stages, path, save=save,
label_edit=gui, overwrite=overwrite)
# Cross-control:
if reload_saved:
data, params = load_data(save, stages, ['songs', 'noise'])
embed()
print('Done.')