import numpy as np from scipy.stats import gaussian_kde from thunderhopper.filetools import crop_paths def shorten_species(name): genus, species = name.split('_') return genus[0] + '. ' + species def unsort_unique(array): values, inds = np.unique(array, return_index=True) return values[np.argsort(inds)] def draw_noise_segment(noise, n): rng = np.random.default_rng() start = rng.integers(0, noise.shape[0] - n, endpoint=True) return np.take(noise, np.arange(start, start + n), axis=0) def sort_files_by_rec(paths, sources=['BM04', 'BM93', 'DJN', 'GBC', 'FTN']): # Separate by source: sorted_paths = {} for source in sources: # Check for any source-specific song files: source_paths = [path for path in paths if source in path] if not source_paths: continue # Separate by recording: sorted_paths[source] = [[]] for path, name in zip(source_paths, crop_paths(source_paths)): # Find numerical ID behind source tag: id_ind = name.find(source) + len(source) + 1 # Get segment where sub-ID would be: sub_id = name[id_ind:].split('-')[1] if 's' in sub_id: # Found time stamp (single recording): sorted_paths[source][0].append(path) continue sub_id = int(sub_id) # Found sub-ID (multiple recordings): if sub_id > len(sorted_paths[source]): # Open new recording-specific slot: sorted_paths[source].append([]) sorted_paths[source][sub_id - 1].append(path) # Re-sort song files by recording only (discarding source separation): sorted_paths = [path for paths in sorted_paths.values() for path in paths] return sorted_paths def get_kde(data, sigma, axis=None, n=1000, pad=10): if axis is None: axis = np.linspace(data.min() - pad * sigma, data.max() + pad * sigma, n) pdf = gaussian_kde(data, sigma)(axis) return pdf, axis def get_saturation(sigmoid, low=0.05, high=0.95, first=True, last=True, condense=None): if condense == 'norm' and sigmoid.ndim == 2: sigmoid = np.linalg.norm(sigmoid, axis=1) min_value = sigmoid[0] if first else sigmoid.min(axis=0) max_value = sigmoid[-1] if last else sigmoid.max(axis=0) span = max_value - min_value low_value = min_value + low * span high_value = min_value + high * span low_mask = sigmoid <= low_value high_mask = sigmoid <= high_value if sigmoid.ndim == 1: low_ind = np.nonzero(low_mask)[0][-1] high_ind = np.nonzero(high_mask)[0][-1] elif condense == 'all': low_ind = np.nonzero(low_mask.all(axis=1))[0][-1] high_ind = np.nonzero(high_mask.all(axis=1))[0][-1] else: low_ind, high_ind = [], [] for i in range(sigmoid.shape[1]): low_ind.append(np.nonzero(low_mask[:, i])[0][-1]) high_ind.append(np.nonzero(high_mask[:, i])[0][-1]) return low_ind, high_ind