Wrote results for pipeline_full, pipeline_short, and feat_cross_species.

2026-05-07 18:15:00 +02:00
parent a48457d967
commit 4b4a04ab2a
14 changed files with 548 additions and 296 deletions
--- a/python/save_inv_data_full.py
+++ b/python/save_inv_data_full.py
@@ -13,12 +13,12 @@ from IPython import embed
 target_species = [
    'Chorthippus_biguttulus',
    'Chorthippus_mollis',
-    # 'Chrysochraon_dispar',
-    # 'Euchorthippus_declivus',
-    # 'Gomphocerippus_rufus',
-    # 'Omocestus_rufipes',
-    # 'Pseudochorthippus_parallelus',
-][1]
+    'Chrysochraon_dispar',
+    'Euchorthippus_declivus',
+    'Gomphocerippus_rufus',
+    'Omocestus_rufipes',
+    'Pseudochorthippus_parallelus',
+][0]
 example_file = {
    'Chorthippus_biguttulus': 'Chorthippus_biguttulus_GBC_94-17s73.1ms-19s977ms',
    'Chorthippus_mollis': 'Chorthippus_mollis_DJN_41_T28C-46s4.58ms-1m15s697ms',
@@ -28,7 +28,7 @@ example_file = {
    'Omocestus_rufipes': 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms',
    'Pseudochorthippus_parallelus': 'Pseudochorthippus_parallelus_GBC_88-6s678ms-9s32.3ms'
 }[target_species]
-data_paths = search_files(target_species, incl='GBC', dir='../data/processed/')
+data_paths = search_files(target_species, incl='DJN', dir='../data/processed/')
 noise_path = '../data/processed/white_noise_sd-1.npz'
 thresh_path = '../data/inv/full/thresholds.npz'
 stages = ['filt', 'env', 'log', 'inv', 'conv', 'feat']
@@ -43,25 +43,22 @@ thresh_rel = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3])

 # SUBSET SETTINGS:
 kernels = None
-types = None
-sigmas = None
+types = None#np.array([1, -1, 2, -2, 3, -3, 4, -4])
+sigmas = None#np.array([0.001, 0.002, 0.004, 0.008, 0.016])

 # PREPARATION:
 pure_noise = np.load(noise_path)['raw']
-thresh_data = dict(np.load(thresh_path))
-thresh_abs = thresh_rel[:, None] * thresh_data['sds'][None, :]
+thresh_data = np.load(thresh_path)['sds']
+thresh_abs = thresh_rel[:, None] * thresh_data[None, :]

 # EXECUTION:
 for data_path, name in zip(data_paths, crop_paths(data_paths)):
    save_detailed = example_file in name
    print(f'Processing {name}')
-    if 'BM04' in name:
-        continue

    # Get song recording (prior to anything):
    data, config = load_data(data_path, files='raw')
-    song, rate = data['raw'], config['rate']
-    print(song.shape, song.size)
+    song, rate = copy.deepcopy(data['raw']), config['rate']

    # Reduce to kernel subset:
    if any(var is not None for var in [kernels, types, sigmas]):
@@ -73,15 +70,19 @@ for data_path, name in zip(data_paths, crop_paths(data_paths)):

    # Get song segment to be analyzed:
    time = np.arange(song.shape[0]) / rate
-    start, end = data['songs_0'].ravel()
+    start, end = copy.deepcopy(data['songs_0'].ravel())
    segment = (time >= start) & (time <= end)
+    del data, time
+    gc.collect()

    # Normalize song component:
    song /= song[segment].std(axis=0)

    # Get normalized noise component:
-    noise = draw_noise_segment(pure_noise, song.shape[0])
+    noise = copy.deepcopy(draw_noise_segment(pure_noise, song.shape[0]))
    noise /= noise[segment].std()
+    del pure_noise
+    gc.collect()

    # Prepare storage:
    shape_low = (scales.size,)
@@ -128,6 +129,8 @@ for data_path, name in zip(data_paths, crop_paths(data_paths)):
                snippets[f'snip_{stage}'][:, ..., scale_ind] = copy.deepcopy(signals[stage])
        
        conv = copy.deepcopy(signals['conv'])
+        for stage in pre_stages:
+            del signals[stage]
        del scaled, signals
        gc.collect()

@@ -161,7 +164,7 @@ for data_path, name in zip(data_paths, crop_paths(data_paths)):
            archive.update(snippets)
        save_data(save_path + name, archive, config, overwrite=True)
        del archive
-    del measures, data, config, conv
+    del measures, config, conv
    if save_detailed:
        del snippets
    gc.collect()