Files
paper_2025/python/misc_functions.py

83 lines
3.0 KiB
Python

import numpy as np
from scipy.stats import gaussian_kde
from thunderhopper.filetools import crop_paths
def shorten_species(name):
genus, species = name.split('_')
return genus[0] + '. ' + species
def unsort_unique(array):
values, inds = np.unique(array, return_index=True)
return values[np.argsort(inds)]
def draw_noise_segment(noise, n):
rng = np.random.default_rng()
start = rng.integers(0, noise.shape[0] - n, endpoint=True)
return np.take(noise, np.arange(start, start + n), axis=0)
def sort_files_by_rec(paths, sources=['BM04', 'BM93', 'DJN', 'GBC', 'FTN']):
# Separate by source:
sorted_paths = {}
for source in sources:
# Check for any source-specific song files:
source_paths = [path for path in paths if source in path]
if not source_paths:
continue
# Separate by recording:
sorted_paths[source] = [[]]
for path, name in zip(source_paths, crop_paths(source_paths)):
# Find numerical ID behind source tag:
id_ind = name.find(source) + len(source) + 1
# Get segment where sub-ID would be:
sub_id = name[id_ind:].split('-')[1]
if 's' in sub_id:
# Found time stamp (single recording):
sorted_paths[source][0].append(path)
continue
sub_id = int(sub_id)
# Found sub-ID (multiple recordings):
if sub_id > len(sorted_paths[source]):
# Open new recording-specific slot:
sorted_paths[source].append([])
sorted_paths[source][sub_id - 1].append(path)
# Re-sort song files by recording only (discarding source separation):
sorted_paths = [path for paths in sorted_paths.values() for path in paths]
return sorted_paths
def get_kde(data, sigma, axis=None, n=1000, pad=10):
if axis is None:
axis = np.linspace(data.min() - pad * sigma, data.max() + pad * sigma, n)
pdf = gaussian_kde(data, sigma)(axis)
return pdf, axis
def get_saturation(sigmoid, low=0.05, high=0.95, first=True, last=True,
condense=None):
if condense == 'norm' and sigmoid.ndim == 2:
sigmoid = np.linalg.norm(sigmoid, axis=1)
min_value = sigmoid[0] if first else sigmoid.min(axis=0)
max_value = sigmoid[-1] if last else sigmoid.max(axis=0)
span = max_value - min_value
low_value = min_value + low * span
high_value = min_value + high * span
low_mask = sigmoid <= low_value
high_mask = sigmoid <= high_value
if sigmoid.ndim == 1:
low_ind = np.nonzero(low_mask)[0][-1]
high_ind = np.nonzero(high_mask)[0][-1]
elif condense == 'all':
low_ind = np.nonzero(low_mask.all(axis=1))[0][-1]
high_ind = np.nonzero(high_mask.all(axis=1))[0][-1]
else:
low_ind, high_ind = [], []
for i in range(sigmoid.shape[1]):
low_ind.append(np.nonzero(low_mask[:, i])[0][-1])
high_ind.append(np.nonzero(high_mask[:, i])[0][-1])
return low_ind, high_ind