Files
paper_2025/python/fig_invariance_thresh-lp_species.py
2026-03-20 16:45:54 +01:00

403 lines
13 KiB
Python

import plotstyle_plt
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from itertools import product
from thunderhopper.filetools import search_files
from thunderhopper.modeltools import load_data
from thunderhopper.filtertools import find_kern_specs
from color_functions import load_colors, shade_colors
from plot_functions import hide_axis, ylimits, xlabel, ylabel, super_ylabel,\
plot_line, plot_barcode, strip_zeros, time_bar,\
letter_subplot, letter_subplots, hide_ticks,\
super_xlabel, super_ylabel, assign_colors
from IPython import embed
def force_sequence(*vars, skip_None=False, equal_size=False):
""" Ensures single-loop compatibility of one or several input variables.
Uses np.ndim() to separate sequence-likes (tuples, lists, >=1D arrays)
and scalar inputs (int, float, bool, 0D arrays, strings, dicts, None).
Scalar variables are promoted to 1D sequences by either tuple wrapping
or expanding by one array dimension (only 0D arrays). All single-entry
sequences can be repeated to match the length of the longest sequence.
Input variables that are None can be excluded from these treatments.
Parameters
----------
*vars : tuple (m,) of inputs (any type)
Input variables to be checked, promoted, and equalized as required.
skip_None : bool, optional
If True, None inputs fall through unmodified. The default is False.
equal_size : bool, optional
If True, counts the number of elements in each passed or promoted
sequence (using len(), meaning that elements are defined as entries
along the first sequence axis) and repeats single-element sequences to
match the maximum count. Arrays with shape[0] == 1 are not tiled but
tuple-wrapped and repeated to avoid deep copies. The default is False.
Returns
-------
vars : array-like or None or list (m,) of array-likes or Nones
Treated output variables, each either a >=1D sequence-like or None.
Single variables are returned without list wrapper.
Raises
------
ValueError
Breaks if equal_size is True and a sequence has incompatible length,
i.e. any number of elements other than 1, 0 (Nones) or the maximum.
"""
# Enforce input iterability:
vars, sizes = list(vars), []
for i, var in enumerate(vars):
if skip_None and var is None:
# Maintain None:
sizes.append(0)
continue
if np.ndim(var) == 0:
# Make each input variable at least 1D sequence-like:
vars[i] = var[None] if isinstance(var, np.ndarray) else (var,)
# Count sequence elements:
sizes.append(len(vars[i]))
# Check early exits:
if len(vars) == 1:
return vars[0]
target = max(sizes)
if not equal_size or target <= 1 or all(n == target for n in sizes):
return vars
# Validate compatibility of element counts:
if not all(n in (0, 1, target) for n in sizes):
msg = f'Given a maximum sequence length of {target}, all variables '\
f'must either have 1 or {target} elements or be None: {sizes}'
raise ValueError(msg)
# Equalize sequence length across input variables:
for i, (var, size) in enumerate(zip(vars, sizes)):
if size == 1:
vars[i] = ((var,) if isinstance(var, np.ndarray) else var) * target
return vars
def split_subplot(ax, side='right', size=10, pad=10):
""" Divides the given parent subplot into two or more separate subplots.
Opens a new axes divider on the area of the parent axes and appends a
number of child axes of given size and padding on the specified sides.
The parent's size is reduced in the process. Values passed for size and
pad are interpreted as percentages of the width (if side is 'left' or
'right') or height (if side is 'top' or 'bottom') of the remainder of
the parent. Practically, size=100 means that child and parent will be
of equal size after the split (regardless of padding) and pad=100 means
that the space between child and parent equals the parent's new width
or height. Any of side, size, or pad can be 1D sequence-likes of equal
length to perform multiple splits using the same divider. Calling this
function multiple times on the same parent subplot is possible but will
open a new and updated divider each time, making the effects of size
and pad values inconsistent between calls.
Parameters
----------
ax : matplotlib axes
Parent subplot to be divided.
side : str or 1D array-like of str (m,)
Sides of the parent subplot where new subplots are to be appended.
Options are 'bottom', 'left', 'top', 'right'. The default is 'right'.
size : int or float or 1D array-like of ints or floats (m,), optional
Horizontal or vertical extent of each child axes as percentage of width
or height of the parent axes after splitting. Multiple splits from the
same side are possible and performed in given order, with the earliest
child axes being positioned closest to the parent. The default is 10.
pad : int or float or 1D array-like of ints or floats (m,), optional
Padding between each child axes and the parent as percentage of width
or height of the parent axes after splitting. The default is 10.
Returns
-------
matplotlib axes or list of matplotlib axes (m,)
One or multiple newly appended child subplots.
"""
# Open divider on parent axes:
div = make_axes_locatable(ax)
# Split off one or multiple child axes:
if not any(np.ndim(var) for var in (side, size, pad)):
return div.append_axes(side, size=f'{size}%', pad=f'{pad}%')
inputs = zip(*force_sequence(side, size, pad, equal_size=True))
return [div.append_axes(s, f'{n}%', f'{p}%') for s, n, p in inputs]
# GENERAL SETTINGS:
targets = [
'Omocestus_rufipes',
'Chorthippus_biguttulus',
# 'Chorthippus_mollis',
# 'Chrysochraon_dispar',
'Gomphocerippus_rufus',
# 'Pseudochorthippus_parallelus',
]
pure_paths = search_files(targets, incl='subset', excl='noise', dir='../data/inv/thresh_lp/')
load_kwargs = dict(
keywords=['scales', 'measure', 'thresh']
)
save_path = '../figures/fig_invariance_thresh_lp_species.pdf'
# SUBSET SETTINGS:
thresh_percent = np.array([0.6, 0.75, 0.999])[0]
kernels = np.array([
[1, 0.008],
[2, 0.004],
[3, 0.002],
])[np.array([0, 1])]
# GRAPH SETTINGS:
fig_kwargs = dict(
figsize=(32/2.54, 16/2.54),
)
n_species = len(targets)
super_grid_kwargs = dict(
nrows=2,
ncols=n_species + 2,
wspace=0,
hspace=0,
left=0,
right=1,
bottom=0,
top=1
)
subfig_specs = dict(
spec=(slice(None), slice(0, n_species)),
big=(slice(None), slice(n_species, None))
)
spec_grid_kwargs = dict(
nrows=2,
ncols=n_species,
wspace=0.25,
hspace=0.1,
left=0.1,
right=0.97,
bottom=0.1,
top=0.94
)
big_grid_kwargs = dict(
nrows=2,
ncols=1,
wspace=0,
hspace=0.2,
left=0,
right=1,
bottom=spec_grid_kwargs['bottom'],
top=spec_grid_kwargs['top']
)
anchor_kwargs = dict(
aspect='equal',
adjustable='box',
anchor=(0.3, 0.5)
)
inset_kwargs = dict(
y0=0.7,
w=0.3,
h=0.2,
)
# PLOT SETTINGS:
base_color = load_colors('../data/stage_colors.npz')['feat']
spec_cmaps = [
'Reds',
'Greens',
'Blues',
]
lw = dict(
spec=2,
kern=3
)
space_kwargs = dict(
s=30,
)
xlabels = dict(
spec='scale $\\alpha$',
big='$\\mu_{f_1}$'
)
ylabels = dict(
spec='$\\mu_f$',
big='$\\mu_{f_2}$',
bar='scale $\\alpha$',
)
xlab_spec_kwargs = dict(
y=0,
fontsize=16,
ha='center',
va='bottom',
)
xlab_big_kwargs = dict(
y=0,
fontsize=20,
ha='center',
va='bottom',
)
ylab_spec_kwargs = dict(
x=0,
fontsize=20,
ha='left',
va='center',
)
ylab_big_kwargs = dict(
x=0.03,
fontsize=20,
ha='center',
va='center',
)
ylab_cbar_kwargs = dict(
x=1,
fontsize=16,
ha='center',
va='bottom',
)
xloc = dict(
big=0.5,
)
yloc = dict(
spec=0.5,
big=0.5
)
letter_spec_kwargs = dict(
x=0,
yref=1,
ha='center',
va='top',
fontsize=22,
)
letter_big_kwargs = dict(
x=0,
yref=1,
ha='center',
va='top',
fontsize=22,
)
time_bar_kwargs = dict(
dur=0.05,
y0=inset_kwargs['y0'],
y1=inset_kwargs['y0'] + 0.03,
color='k',
lw=0
)
cbar_bounds = [
0.05,
big_grid_kwargs['bottom'],
0.15,
big_grid_kwargs['top'] - big_grid_kwargs['bottom']
]
shade_factors = [0.9, -0.9]
# EXECUTION:
# Prepare overall graph:
fig = plt.figure(**fig_kwargs)
super_grid = fig.add_gridspec(**super_grid_kwargs)
# Prepare species-specific axes:
spec_subfig = fig.add_subfigure(super_grid[subfig_specs['spec']])
spec_grid = spec_subfig.add_gridspec(**spec_grid_kwargs)
spec_axes = np.zeros((spec_grid_kwargs['nrows'], n_species), dtype=object)
for i, j in product(range(spec_grid_kwargs['nrows']), range(n_species)):
ax = spec_subfig.add_subplot(spec_grid[i, j])
ax.set_xscale('symlog', linthresh=0.1, linscale=0.5)
ax.yaxis.set_major_locator(plt.MultipleLocator(yloc['spec']))
ax.set_ylim(0, 1)
spec_axes[i, j] = ax
super_xlabel(xlabels['spec'], spec_subfig, spec_axes[-1, 0], spec_axes[-1, -1], **xlab_spec_kwargs)
super_ylabel(ylabels['spec'], spec_subfig, spec_axes[-1, 0], spec_axes[0, 0], **ylab_spec_kwargs)
[hide_ticks(ax, side='bottom') for ax in spec_axes[0, :]]
[hide_ticks(ax, side='left') for ax in spec_axes[:, 1:].ravel()]
letter_subplots(spec_axes[0, :], labels='abc', ref=spec_subfig, **letter_spec_kwargs)
# Prepare kernel insets:
x0 = np.linspace(0, 1, kernels.shape[0] + 1)[:-1] + 1 / kernels.shape[0] / 2
x0 -= inset_kwargs['w'] / 2
insets = []
for i in range(kernels.shape[0]):
bounds = [x0[i], inset_kwargs['y0'], inset_kwargs['w'], inset_kwargs['h']]
inset = spec_axes[0, 0].inset_axes(bounds)
inset.set_title(rf'$k_{{{i+1}}}$', fontsize=20)
inset.axis('off')
insets.append(inset)
# Prepare feature space axes:
big_subfig = fig.add_subfigure(super_grid[subfig_specs['big']])
big_grid = big_subfig.add_gridspec(**big_grid_kwargs)
big_axes = np.zeros(super_grid_kwargs['nrows'], dtype=object)
for i in range(big_axes.size):
ax = big_subfig.add_subplot(big_grid[i, 0])
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.xaxis.set_major_locator(plt.MultipleLocator(xloc['big']))
ax.yaxis.set_major_locator(plt.MultipleLocator(yloc['big']))
ax.set_aspect(**anchor_kwargs)
# ax.set_ylabel(ylabels['big'], **ylab_big_kwargs)
ylabel(ax, ylabels['big'], transform=big_subfig.transSubfigure, **ylab_big_kwargs)
big_axes[i] = ax
super_xlabel(xlabels['big'], big_subfig, big_axes[1], big_axes[1], **xlab_big_kwargs)
hide_ticks(big_axes[0], side='bottom')
letter_subplot(big_axes[0], 'd', ref=big_subfig, **letter_big_kwargs)
# Prepare colorbars:
cbar_bounds[0] += big_axes[-1].get_position().x1
bar_axes = [big_subfig.add_axes(cbar_bounds)]
bar_axes.extend(split_subplot(bar_axes[0], side=['right', 'right'], size=100, pad=0))
# Plot results per species:
for i, pure_path in enumerate(pure_paths):
print(f'Processing {pure_path}')
noise_path = pure_path.replace('.npz', '_noise.npz')
# Load invariance data:
pure_data, config = load_data(pure_path, **load_kwargs)
noise_data, _ = load_data(noise_path, **load_kwargs)
scales = pure_data['scales']
# Reduce to kernel subset and single threshold:
thresh_ind = np.nonzero(pure_data['thresh_perc'] == thresh_percent)[0][0]
kern_inds = find_kern_specs(config['k_specs'], kerns=kernels)
config['k_specs'] = config['k_specs'][kern_inds]
config['kernels'] = config['kernels'][:, kern_inds]
pure_measure = pure_data['measure_feat'][:, kern_inds, thresh_ind]
noise_measure = noise_data['measure_feat'][:, kern_inds, thresh_ind]
# Plot invariance curves:
pure_ax, noise_ax = spec_axes[:, i]
pure_ax.plot(scales, pure_measure, c=base_color, lw=lw['spec'])
noise_ax.plot(scales, noise_measure, c=base_color, lw=lw['spec'])
if i == 0:
# Indicate kernel waveforms:
ylims = ylimits(config['kernels'], pad=0.05)
xlims = (config['k_times'][0], config['k_times'][-1])
for j, inset in enumerate(insets):
inset.plot(config['k_times'], config['kernels'][:, j],
c='k', lw=lw['kern'])
inset.set_xlim(xlims)
inset.set_ylim(ylims)
time_bar(insets[0], parent=spec_axes[0, 0], **time_bar_kwargs)
# Plot pure feature space:
handle = big_axes[0].scatter(pure_measure[:, 0], pure_measure[:, 1],
c=scales, cmap=spec_cmaps[i], **space_kwargs)
# Plot noise feature space:
big_axes[1].scatter(noise_measure[:, 0], noise_measure[:, 1],
c=scales, cmap=spec_cmaps[i], **space_kwargs)
# Indicate scale color code:
big_subfig.colorbar(handle, cax=bar_axes[i])
bar_axes[i].set_yscale('symlog', linthresh=scales[1], linscale=0.2)
if i < len(pure_paths) - 1:
hide_ticks(bar_axes[i], 'right', ticks=False)
else:
ylabel(bar_axes[i], ylabels['bar'], transform=big_subfig.transSubfigure, **ylab_cbar_kwargs)
if save_path is not None:
fig.savefig(save_path)
plt.show()
print('Done.')
embed()