Fetched bunch of species-specific song snippets.

Worked those into LogHP analysis.
Worked results into fig_invariance_log-hp.pdf.
Put details into new fig_invariance_log-hp_species.pdf (appendix).
This commit is contained in:
j-hartling
2026-04-14 17:30:58 +02:00
parent 0b9264b1e1
commit 36ac504efa
17 changed files with 490 additions and 205 deletions

View File

@@ -0,0 +1,136 @@
import numpy as np
import matplotlib.pyplot as plt
from thunderhopper.filetools import search_files, crop_paths
from thunderhopper.modeltools import load_data, save_data
from misc_functions import shorten_species
from IPython import embed
# GENERAL SETTINGS:
target_species = [
'Chorthippus_biguttulus',
'Chorthippus_mollis',
'Chrysochraon_dispar',
'Euchorthippus_declivus',
'Gomphocerippus_rufus',
'Omocestus_rufipes',
'Pseudochorthippus_parallelus',
]
sources = [
'BM04',
'BM93',
'DJN',
'GBC',
'FTN'
]
search_path = '../data/inv/log_hp/'
ref_path = '../data/inv/log_hp/ref_measures.npz'
save_path = '../data/inv/log_hp/condensed/'
# ANALYSIS SETTINGS:
compute_ratios = True
plot_overview = True
# PREPARATION:
if compute_ratios:
ref_measure = np.load(ref_path)['inv']
if plot_overview:
fig, axes = plt.subplots(3, len(target_species), figsize=(16, 9),
sharex=True, sharey=True, layout='constrained')
axes[0, 0].set_ylabel('songs')
axes[1, 0].set_ylabel('recordings\n(mean ± SD)')
axes[2, 0].set_ylabel('total\n(mean ± SEM)')
# EXECUTION:
for i, species in enumerate(target_species):
print(f'Processing {species}')
if plot_overview:
axes[0, i].set_title(shorten_species(species))
# Fetch all species-specific song files:
all_paths = search_files(species, incl='noise', ext='npz', dir=search_path)
# Separate by source:
sorted_paths = {}
for source in sources:
# Check for any source-specific song files:
source_paths = [path for path in all_paths if source in path]
if not source_paths:
continue
# Separate by recording:
sorted_paths[source] = [[]]
for path, name in zip(source_paths, crop_paths(source_paths)):
# Find numerical ID behind source tag:
id_ind = name.find(source) + len(source) + 1
# Check if ID is followed by sub-ID:
sub_id = name[id_ind:].split('-')[1]
if 's' in sub_id:
# Single (time stamp in next spot):
sorted_paths[source][0].append(path)
continue
sub_id = int(sub_id)
# Multiple (sub-ID in next spot):
if sub_id > len(sorted_paths[source]):
# Open new recording-specific slot:
sorted_paths[source].append([])
sorted_paths[source][sub_id - 1].append(path)
# Re-sort song files only by recording (discarding source separation):
sorted_paths = [path for paths in sorted_paths.values() for path in paths]
# Condense across song files per recording:
for j, rec_paths in enumerate(sorted_paths):
for k, path in enumerate(rec_paths):
# Load invariance data:
data, _ = load_data(path, ['scales', 'measure_inv'])
scales, measure = data['scales'], data['measure_inv']
# Relate to noise:
if compute_ratios:
measure /= ref_measure
if k == 0:
# Prepare song file-specific storage:
file_data = np.zeros((scales.size, len(rec_paths)), dtype=float)
if j == 0:
# Prepare recording-specific storage:
rec_mean = np.zeros((scales.size, len(sorted_paths)), dtype=float)
rec_sd = np.zeros((scales.size, len(sorted_paths)), dtype=float)
# Log song file data:
file_data[:, k] = measure
if plot_overview:
axes[0, i].plot(scales, measure, c='k', alpha=0.5)
# Get recording statistics:
rec_mean[:, j] = file_data.mean(axis=1)
rec_sd[:, j] = file_data.std(axis=1)
if plot_overview:
axes[1, i].plot(scales, rec_mean[:, j], c='k')
axes[1, i].fill_between(scales, rec_mean[:, j] - rec_sd[:, j],
rec_mean[:, j] + rec_sd[:, j], color='k', alpha=0.2)
# Save condensed recording data for current species:
np.savez(save_path + species, scales=scales, mean=rec_mean, sd=rec_sd)
if plot_overview:
spec_mean = rec_mean.mean(axis=1)
spec_sd = rec_mean.std(axis=1)
axes[2, i].plot(scales, spec_mean, c='k')
axes[2, i].fill_between(scales, spec_mean - spec_sd, spec_mean + spec_sd,
color='k', alpha=0.2)
print('Done.')
if plot_overview:
axes[0, 0].set_xlim(scales[0], scales[-1])
axes[0, 0].set_xscale('log')
axes[0, 0].set_yscale('log')
plt.show()

View File

@@ -27,58 +27,29 @@ def plot_snippets(axes, time, snippets, ymin=None, ymax=None, **kwargs):
handles.extend(plot_line(ax, time, snippet, ymin=ymin, ymax=ymax, **kwargs))
return handles
def plot_dist_shifted(ax, data, axis, pdf=None, sigma=0.1, which='x',
base=None, cap=None, add_pdf=False, shifted=False, **kwargs):
if pdf is None:
pdf, axis = get_kde(data, sigma, axis)
if base is None:
base = pdf.min()
if cap is None:
cap = pdf.max()
pdf = (pdf - pdf.min()) / (pdf.max() - pdf.min()) * (cap - base) + base
if which == 'x':
transform = ax.get_xaxis_transform()
elif which == 'y':
transform = ax.get_yaxis_transform()
else:
transform = ax.transData
rng = np.random.default_rng()
handles = []
for value in data:
ind = np.nonzero(axis == value)[0][0]
offset = base if not shifted else rng.uniform(base, pdf[ind])
variables = (offset, value) if which=='y' else (value, offset)
handles.extend(ax.plot(*variables, transform=transform, **kwargs))
if add_pdf:
variables = (pdf, axis) if which=='y' else (axis, pdf)
pdf_handle = ax.plot(*variables, transform=transform, c='k', lw=1)
return handles, pdf_handle
return handles
def zalpha(handles, background='w', down=1):
twins = []
for handle in handles:
twin = handle.copy()
twin.set(color=background, alpha=1)
twin.set_zorder(handle.get_zorder() - down)
twins.append(twin)
return twins
# def zalpha(handles, background='w', down=1):
# twins = []
# for handle in handles:
# twin = handle.copy()
# twin.set(color=background, alpha=1)
# twin.set_zorder(handle.get_zorder() - down)
# twins.append(twin)
# return twins
# GENERAL SETTINGS:
target = 'Omocestus_rufipes'
target = 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms'
data_paths = search_files(target, excl='noise', dir='../data/inv/log_hp/')
ref_path = '../data/inv/log_hp/ref_measures.npz'
save_path = '../figures/fig_invariance_log_hp.pdf'
target_species = [
'Omocestus_rufipes',
'Chorthippus_biguttulus',
'Chorthippus_mollis',
'Chrysochraon_dispar',
'Euchorthippus_declivus',
'Gomphocerippus_rufus',
'Omocestus_rufipes',
'Pseudochorthippus_parallelus',
]
]
stages = ['env', 'log', 'inv']
load_kwargs = dict(
files=stages,
@@ -159,12 +130,13 @@ fs = dict(
)
colors = load_colors('../data/stage_colors.npz')
species_colors = load_colors('../data/species_colors.npz')
noise_colors = [(0.5, 0.5, 0.5), (0.7, 0.7, 0.7)]
noise_colors = [(0.6,) * 3, (0.8,) * 3]
lw = dict(
snip=1,
big=4,
spec=2,
plateau=1,
plateau=1.5,
legend=5,
)
xlabels = dict(
big='scale $\\alpha$',
@@ -273,7 +245,7 @@ leg_kwargs = dict(
columnspacing=1,
)
diag_kwargs = dict(
c=(0.75, 0.75, 0.75),
c=(0.3,) * 3,
lw=2,
ls='--',
zorder=1.9,
@@ -297,18 +269,9 @@ plateau_line_kwargs = dict(
)
plateau_dot_kwargs = dict(
marker='o',
markersize=10,
markeredgecolor='k',
markersize=8,
markeredgewidth=1,
# alpha=1,
zorder=6,
clip_on=False,
# base=0,
# cap=0.15,
# add_pdf=True,
)
kde_kwargs = dict(
sigma=0.1,
)
# PREPARATION:
@@ -317,18 +280,12 @@ if compute_ratios:
species_measures = {}
thresh_inds = np.zeros((len(target_species),), dtype=int)
thresh_scales = np.zeros((len(target_species),), dtype=float)
for i, species in enumerate(target_species):
path = search_files(species, incl='noise', dir='../data/inv/log_hp/')[0]
species_data = load_data(path, **load_kwargs)[0]
measure = species_data['measure_inv']
scales = species_data['scales']
if compute_ratios:
measure /= ref_measures['inv']
spec_path = search_files(species, dir='../data/inv/log_hp/condensed/')[0]
spec_data = dict(np.load(spec_path))
measure = spec_data['mean'].mean(axis=1)
species_measures[species] = measure
thresh_inds[i] = get_saturation(measure, **plateau_settings)[1]
thresh_scales[i] = scales[thresh_inds[i]]
thresh_pdf, pdf_axis = get_kde(thresh_scales, axis=scales, **kde_kwargs)
# EXECUTION:
for data_path in data_paths:
@@ -460,24 +417,23 @@ for data_path in data_paths:
big_axes[1].axvspan(noise_scales[low_ind], noise_scales[high_ind],
fc=noise_colors[1], **plateau_rect_kwargs)
# Plot species-specific noise-song measures:
# Plot species-specific noise-song invariance curves:
for i, (species, measure) in enumerate(species_measures.items()):
# Plot invariance curve:
color = species_colors[species]
ind, scale = thresh_inds[i], thresh_scales[i]
big_axes[2].plot(noise_scales, measure, label=shorten_species(species),
c=color, lw=lw['spec'])
# Indicate saturation:
ind = thresh_inds[i]
scale = noise_scales[ind]
big_axes[2].plot(scale, 0, c='w', alpha=1, zorder=5.5, **plateau_dot_kwargs,
transform=big_axes[2].get_xaxis_transform())
handle = big_axes[2].plot(scale, 0, c=color, alpha=0.5, **plateau_dot_kwargs,
transform=big_axes[2].get_xaxis_transform())
big_axes[2].plot(scale, 0, mfc=color, mec='k', alpha=0.75, zorder=6, **plateau_dot_kwargs,
transform=big_axes[2].get_xaxis_transform())
big_axes[2].vlines(scale, big_axes[2].get_ylim()[0], measure[ind],
color=color, **plateau_line_kwargs)
big_axes[2].legend(**leg_kwargs)
# handles = plot_dist_shifted(big_axes[2], species_threshs, axis=pdf_axis,
# pdf=thresh_pdf, **plateau_dot_kwargs)[0]
# [h.set_color(species_colors[s]) for h, s in zip(handles, target_species)]
legend = big_axes[2].legend(**leg_kwargs)
[h.set_lw(lw['legend']) for h in legend.legend_handles]
if save_path is not None:
fig.savefig(save_path, bbox_inches='tight')

View File

@@ -0,0 +1,108 @@
import plotstyle_plt
import numpy as np
import matplotlib.pyplot as plt
from thunderhopper.filetools import search_files
from plot_functions import ylabel, super_xlabel, letter_subplots, title_subplot
from color_functions import load_colors
from misc_functions import shorten_species
# GENERAL SETTINGS:
target_species = [
'Chorthippus_biguttulus',
'Chorthippus_mollis',
'Chrysochraon_dispar',
'Euchorthippus_declivus',
'Gomphocerippus_rufus',
'Omocestus_rufipes',
'Pseudochorthippus_parallelus',
]
data_path = '../data/inv/log_hp/condensed/'
save_path = '../figures/fig_invariance_log-hp_species.pdf'
# GRAPH SETTINGS:
fig_kwargs = dict(
figsize=(32/2.54, 16/2.54),
nrows=1,
ncols=len(target_species),
sharex=True,
sharey=True,
gridspec_kw=dict(
wspace=0.4,
hspace=0,
left=0.07,
right=0.98,
bottom=0.1,
top=0.95,
)
)
# PLOT SETTINGS:
colors = load_colors('../data/species_colors.npz')
line_kwargs = dict(
lw=2,
)
fill_kwargs = dict(
alpha=0.3,
zorder=1,
)
xlab = 'scale $\\alpha$'
ylab = '$\\sigma_{\\alpha}\\,/\\,\\sigma_{\\eta}$'
xlab_kwargs = dict(
y=0,
fontsize=16,
ha='center',
va='bottom',
)
ylab_kwargs = dict(
x=0,
fontsize=20,
ha='center',
va='top',
)
title_kwargs = dict(
x=0.5,
yref=0.99,
ha='center',
va='top',
fontsize=16,
fontstyle='italic',
)
letter_kwargs = dict(
x=0.005,
y=0.99,
fontsize=22,
ha='left',
va='top',
)
# Prepare graph:
fig, axes = plt.subplots(**fig_kwargs)
axes[0].set_ylim(0.9, 20)
axes[0].set_xscale('log')
axes[0].set_yscale('log')
super_xlabel(xlab, fig, axes[0], axes[-1], **xlab_kwargs)
ylabel(axes[0], ylab, **ylab_kwargs, transform=fig.transFigure)
# letter_subplots(axes, **letter_kwargs)
# Run through species:
for species, ax in zip(target_species, axes):
title_subplot(ax, shorten_species(species), ref=fig, **title_kwargs)
color = colors[species]
# Load species data:
path = search_files(species, dir=data_path)[0]
data = dict(np.load(path))
scales = data['scales']
means = data['mean']
sds = data['sd']
# Plot recording-specific traces:
for mean, sd in zip(means.T, sds.T):
ax.plot(scales, mean, c=color, **line_kwargs)
ax.fill_between(scales, mean - sd, mean + sd, color=color, **fill_kwargs)
# Save graph:
fig.savefig(save_path)
plt.show()

View File

@@ -2,6 +2,7 @@ import string
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.transforms import Bbox, BboxTransformTo, TransformedBbox
from misc_functions import get_kde
def hide_ticks(ax, side='bottom', ticks=True):
axis = 'x' if side in ['top', 'bottom'] else 'y'
@@ -298,3 +299,33 @@ def zoom_inset(ax, inset, handle, x0=None, x1=None, y0=None, y1=None, ref='x',
def set_clip_box(artist, ax, bounds=[[0, -0.05], [1, 1.05]]):
artist.set_clip_box(TransformedBbox(Bbox(bounds), ax.transAxes))
return None
def plot_dist_shifted(ax, data, axis, pdf=None, sigma=0.1, which='x',
base=None, cap=None, add_pdf=False, shifted=False, **kwargs):
if pdf is None:
pdf, axis = get_kde(data, sigma, axis)
if base is None:
base = pdf.min()
if cap is None:
cap = pdf.max()
pdf = (pdf - pdf.min()) / (pdf.max() - pdf.min()) * (cap - base) + base
if which == 'x':
transform = ax.get_xaxis_transform()
elif which == 'y':
transform = ax.get_yaxis_transform()
else:
transform = ax.transData
rng = np.random.default_rng()
handles = []
for value in data:
ind = np.nonzero(axis == value)[0][0]
offset = base if not shifted else rng.uniform(base, pdf[ind])
variables = (offset, value) if which=='y' else (value, offset)
handles.extend(ax.plot(*variables, transform=transform, **kwargs))
if add_pdf:
variables = (pdf, axis) if which=='y' else (axis, pdf)
pdf_handle = ax.plot(*variables, transform=transform, c='k', lw=1)
return handles, pdf_handle
return handles

View File

@@ -5,16 +5,17 @@ from thunderhopper.filters import decibel, sosfilter
from IPython import embed
# GENERAL SETTINGS:
target = ['Omocestus_rufipes', '*'][0]
data_paths = search_files(target, excl='noise', dir='../data/processed/')
example_file = 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms'
search_target = ['*', example_file][1]
data_paths = search_files(search_target, excl='noise', dir='../data/processed/')
noise_path = '../data/processed/white_noise_sd-1.npz'
save_path = '../data/inv/log_hp/'
# ANALYSIS SETTINGS:
add_noise = target == '*' or False
save_snippets = target == 'Omocestus_rufipes'
add_noise = search_target == '*' or False
save_detailed = search_target == example_file
example_scales = np.array([0.1, 1, 10, 30, 100, 300])
scales = np.geomspace(0.1, 10000, 500)
scales = np.geomspace(0.01, 10000, 1000)
scales = np.unique(np.concatenate((scales, example_scales)))
# PREPARATION:
@@ -35,47 +36,67 @@ for data_path, name in zip(data_paths, crop_paths(data_paths)):
# Normalize song component:
song /= song[segment].std()
# Rescale song component:
mix = song[:, None] * scales[None, :]
if add_noise:
# Add normalized noise component:
# Get normalized noise component:
noise = pure_noise[:song.shape[0]]
noise /= noise[segment].std()
mix += noise[:, None]
# Process mixture:
mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp',
padtype='even', padlen=config['padlen'])
mix_log = decibel(mix, ref=1)
mix_inv = sosfilter(mix_log, rate, config['inv_fcut'], 'hp',
padtype='constant', padlen=config['padlen'])
# Prepare storage:
measure_inv = np.zeros_like(scales)
if save_detailed:
# Prepare optional storage:
measure_env = np.zeros_like(scales)
measure_log = np.zeros_like(scales)
snip_env = np.zeros((song.shape[0], example_scales.size))
snip_log = np.zeros((song.shape[0], example_scales.size))
snip_inv = np.zeros((song.shape[0], example_scales.size))
# Get intensity measure per stage:
measure_env = mix[segment, :].std(axis=0)
measure_log = mix_log[segment, :].std(axis=0)
measure_inv = mix_inv[segment, :].std(axis=0)
# Execute piecewise:
for i, scale in enumerate(scales):
# Get scaled mixture:
mix = song * scale
if add_noise:
mix += noise
# Process mixture:
mix = sosfilter(np.abs(mix), rate, config['env_fcut'], 'lp',
padtype='even', padlen=config['padlen'])
mix_log = decibel(mix, ref=1)
mix_inv = sosfilter(mix_log, rate, config['inv_fcut'], 'hp',
padtype='constant', padlen=config['padlen'])
# Log intensity measures:
measure_inv[i] = mix_inv[segment].std()
if save_detailed:
measure_env[i] = mix[segment].std()
measure_log[i] = mix_log[segment].std()
if scale in example_scales:
# Log snippet data:
save_ind = np.nonzero(example_scales == scale)[0][0]
snip_env[:, save_ind] = mix
snip_log[:, save_ind] = mix_log
snip_inv[:, save_ind] = mix_inv
# Save analysis results:
save_inds = np.nonzero(np.isin(scales, example_scales))[0]
if save_path is not None:
data = dict(
archive = dict(
scales=scales,
example_scales=example_scales,
measure_env=measure_env,
measure_log=measure_log,
measure_inv=measure_inv,
)
if save_snippets:
data.update(
snip_env=mix[:, save_inds],
snip_log=mix_log[:, save_inds],
snip_inv=mix_inv[:, save_inds],
)
if save_detailed:
archive.update(
measure_env=measure_env,
measure_log=measure_log,
snip_env=snip_env,
snip_log=snip_log,
snip_inv=snip_inv,
)
file_name = save_path + name
if add_noise:
file_name += '_noise'
save_data(file_name, data, config, overwrite=True)
save_data(file_name, archive, config, overwrite=True)
print('Done.')
embed()

View File

@@ -7,9 +7,10 @@ from IPython import embed
## SETTINGS:
# General:
search_target = '*'
input_folder = '../data/raw/'
output_folder = '../data/processed/'
stages = ['raw', 'filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat', 'norm']
stages = ['raw', 'filt', 'env', 'log', 'inv', 'conv', 'bi', 'feat']
if False:
# Overwrites edited:
stages.append('songs')
@@ -30,7 +31,7 @@ config.update({
'rate_ratio': None,
'env_fcut': 250,
'db_ref': 1,
'inv_fcut': 5,
'inv_fcut': 10,
'feat_thresh': np.load('../data/kernel_thresholds.npy') * 0.2,
'feat_fcut': 0.5,
'label_channels': 0,
@@ -40,7 +41,7 @@ config.update({
## PREPARATION:
# Fetch WAV recording files:
input_paths = search_files(ext='wav', dir=input_folder)
input_paths = search_files(search_target, ext='wav', dir=input_folder)
path_names = crop_paths(input_paths)
# PROCESSING:

View File

@@ -6,11 +6,12 @@ from IPython import embed
# Settings:
species = [
'Omocestus_rufipes',
'Chorthippus_biguttulus',
'Chorthippus_mollis',
'Chrysochraon_dispar',
'Euchorthippus_declivus',
'Gomphocerippus_rufus',
'Omocestus_rufipes',
'Pseudochorthippus_parallelus',
]
file_name = '../data/species_colors.npz'