paper_2025/python/fig_invariance_thresh-lp_species.py

import plotstyle_plt
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from itertools import product
from thunderhopper.filetools import search_files
from thunderhopper.modeltools import load_data
from thunderhopper.filtertools import find_kern_specs
from color_functions import load_colors, shade_colors
from plot_functions import hide_axis, ylimits, xlabel, ylabel, super_ylabel,\
                           plot_line, plot_barcode, strip_zeros, time_bar,\
                           letter_subplot, letter_subplots, hide_ticks,\
                           super_xlabel, super_ylabel, assign_colors
from IPython import embed

def force_sequence(*vars, skip_None=False, equal_size=False):
    """ Ensures single-loop compatibility of one or several input variables.
        Uses np.ndim() to separate sequence-likes (tuples, lists, >=1D arrays)
        and scalar inputs (int, float, bool, 0D arrays, strings, dicts, None).
        Scalar variables are promoted to 1D sequences by either tuple wrapping
        or expanding by one array dimension (only 0D arrays). All single-entry
        sequences can be repeated to match the length of the longest sequence.
        Input variables that are None can be excluded from these treatments.

    Parameters
    ----------
    *vars : tuple (m,) of inputs (any type)
        Input variables to be checked, promoted, and equalized as required.
    skip_None : bool, optional
        If True, None inputs fall through unmodified. The default is False.
    equal_size : bool, optional
        If True, counts the number of elements in each passed or promoted
        sequence (using len(), meaning that elements are defined as entries
        along the first sequence axis) and repeats single-element sequences to
        match the maximum count. Arrays with shape[0] == 1 are not tiled but
        tuple-wrapped and repeated to avoid deep copies. The default is False.

    Returns
    -------
    vars : array-like or None or list (m,) of array-likes or Nones
        Treated output variables, each either a >=1D sequence-like or None.
        Single variables are returned without list wrapper.

    Raises
    ------
    ValueError
        Breaks if equal_size is True and a sequence has incompatible length,
        i.e. any number of elements other than 1, 0 (Nones) or the maximum.
    """
    # Enforce input iterability:
    vars, sizes = list(vars), []
    for i, var in enumerate(vars):
        if skip_None and var is None:
            # Maintain None:
            sizes.append(0)
            continue
        if np.ndim(var) == 0:
            # Make each input variable at least 1D sequence-like:
            vars[i] = var[None] if isinstance(var, np.ndarray) else (var,)
        # Count sequence elements:
        sizes.append(len(vars[i]))

    # Check early exits:
    if len(vars) == 1:
        return vars[0]
    target = max(sizes)
    if not equal_size or target <= 1 or all(n == target for n in sizes):
        return vars

    # Validate compatibility of element counts:
    if not all(n in (0, 1, target) for n in sizes):
        msg = f'Given a maximum sequence length of {target}, all variables '\
              f'must either have 1 or {target} elements or be None: {sizes}'
        raise ValueError(msg)

    # Equalize sequence length across input variables:
    for i, (var, size) in enumerate(zip(vars, sizes)):
        if size == 1:
            vars[i] = ((var,) if isinstance(var, np.ndarray) else var) * target
    return vars

def split_subplot(ax, side='right', size=10, pad=10):
    """ Divides the given parent subplot into two or more separate subplots.
        Opens a new axes divider on the area of the parent axes and appends a
        number of child axes of given size and padding on the specified sides.
        The parent's size is reduced in the process. Values passed for size and
        pad are interpreted as percentages of the width (if side is 'left' or
        'right') or height (if side is 'top' or 'bottom') of the remainder of
        the parent. Practically, size=100 means that child and parent will be
        of equal size after the split (regardless of padding) and pad=100 means
        that the space between child and parent equals the parent's new width
        or height. Any of side, size, or pad can be 1D sequence-likes of equal
        length to perform multiple splits using the same divider. Calling this
        function multiple times on the same parent subplot is possible but will
        open a new and updated divider each time, making the effects of size
        and pad values inconsistent between calls.

    Parameters
    ----------
    ax : matplotlib axes
        Parent subplot to be divided.
    side : str or 1D array-like of str (m,)
        Sides of the parent subplot where new subplots are to be appended.
        Options are 'bottom', 'left', 'top', 'right'. The default is 'right'.
    size : int or float or 1D array-like of ints or floats (m,), optional
        Horizontal or vertical extent of each child axes as percentage of width
        or height of the parent axes after splitting. Multiple splits from the
        same side are possible and performed in given order, with the earliest
        child axes being positioned closest to the parent. The default is 10.
    pad : int or float or 1D array-like of ints or floats (m,), optional
        Padding between each child axes and the parent as percentage of width
        or height of the parent axes after splitting. The default is 10.

    Returns
    -------
    matplotlib axes or list of matplotlib axes (m,)
        One or multiple newly appended child subplots.
    """
    # Open divider on parent axes:
    div = make_axes_locatable(ax)

    # Split off one or multiple child axes:
    if not any(np.ndim(var) for var in (side, size, pad)):
        return div.append_axes(side, size=f'{size}%', pad=f'{pad}%')
    inputs = zip(*force_sequence(side, size, pad, equal_size=True))
    return [div.append_axes(s, f'{n}%', f'{p}%') for s, n, p in inputs]


# GENERAL SETTINGS:
targets = [
    'Omocestus_rufipes',
    'Chorthippus_biguttulus',
    # 'Chorthippus_mollis',
    # 'Chrysochraon_dispar',
    'Gomphocerippus_rufus',
    # 'Pseudochorthippus_parallelus',
    ]
pure_paths = search_files(targets, incl='subset', excl='noise', dir='../data/inv/thresh_lp/')
load_kwargs = dict(
    keywords=['scales', 'measure', 'thresh']
)
save_path = '../figures/fig_invariance_thresh_lp_species.pdf'

# SUBSET SETTINGS:
thresh_percent = np.array([0.6, 0.75, 0.999])[0]
kernels = np.array([
    [1, 0.008],
    [2, 0.004],
    [3, 0.002],
])[np.array([0, 1])]

# GRAPH SETTINGS:
fig_kwargs = dict(
    figsize=(32/2.54, 16/2.54),
)
n_species = len(targets)
super_grid_kwargs = dict(
    nrows=2,
    ncols=n_species + 2,
    wspace=0,
    hspace=0,
    left=0,
    right=1,
    bottom=0,
    top=1
)
subfig_specs = dict(
    spec=(slice(None), slice(0, n_species)),
    big=(slice(None), slice(n_species, None))
)
spec_grid_kwargs = dict(
    nrows=2,
    ncols=n_species,
    wspace=0.25,
    hspace=0.1,
    left=0.1,
    right=0.97,
    bottom=0.1,
    top=0.94
)
big_grid_kwargs = dict(
    nrows=2,
    ncols=1,
    wspace=0,
    hspace=0.2,
    left=0,
    right=1,
    bottom=spec_grid_kwargs['bottom'],
    top=spec_grid_kwargs['top']
)
anchor_kwargs = dict(
    aspect='equal',
    adjustable='box',
    anchor=(0.3, 0.5)
)
inset_kwargs = dict(
    y0=0.7,
    w=0.3,
    h=0.2,
)

# PLOT SETTINGS:
base_color = load_colors('../data/stage_colors.npz')['feat']
spec_cmaps = [
    'Reds',
    'Greens',
    'Blues',
]
lw = dict(
    spec=2,
    kern=3
)
space_kwargs = dict(
    s=30,
)
xlabs = dict(
    spec='scale $\\alpha$',
    big='$\\mu_{f_1}$'
)
ylabs = dict(
    spec='$\\mu_f$',
    big='$\\mu_{f_2}$',
)
xlab_spec_kwargs = dict(
    y=0.005,
    fontsize=16,
    ha='center',
    va='bottom',
)
ylab_spec_kwargs = dict(
    x=0,
    fontsize=20,
    ha='left',
    va='center',
)
xlab_big_kwargs = dict(
    y=0.005,
    fontsize=20,
    ha='center',
    va='bottom',
)
ylab_big_kwargs = dict(
    x=0.03,
    fontsize=20,
    ha='center',
    va='center',
)
xloc = dict(
    big=0.5,
)
yloc = dict(
    spec=0.5,
    big=0.5
)
spec_letter_kwargs = dict(
    x=0,
    y=1.03,
    ha='center',
    va='bottom',
    fontsize=22,
)
big_letter_kwargs = dict(
    x=0,
    yref=spec_letter_kwargs['y'],
    ha='center',
    va='bottom',
    fontsize=22,
)
time_bar_kwargs = dict(
    dur=0.05,
    y0=inset_kwargs['y0'],
    y1=inset_kwargs['y0'] + 0.03,
    color='k',
    lw=0
)
cbar_bounds = [
    0.8,
    big_grid_kwargs['bottom'],
    0.15,
    big_grid_kwargs['top'] - big_grid_kwargs['bottom']
]
shade_factors = [0.9, -0.9]

# EXECUTION:

# Prepare overall graph:
fig = plt.figure(**fig_kwargs)
super_grid = fig.add_gridspec(**super_grid_kwargs)

# Prepare species-specific axes:
spec_subfig = fig.add_subfigure(super_grid[subfig_specs['spec']])
spec_grid = spec_subfig.add_gridspec(**spec_grid_kwargs)
spec_axes = np.zeros((spec_grid_kwargs['nrows'], n_species), dtype=object)
for i, j in product(range(spec_grid_kwargs['nrows']), range(n_species)):
    ax = spec_subfig.add_subplot(spec_grid[i, j])
    ax.set_xscale('symlog', linthresh=0.1, linscale=0.5)
    ax.yaxis.set_major_locator(plt.MultipleLocator(yloc['spec']))
    ax.set_ylim(0, 1)
    spec_axes[i, j] = ax
super_xlabel(xlabs['spec'], spec_subfig, spec_axes[-1, 0], spec_axes[-1, -1], **xlab_spec_kwargs)
super_ylabel(ylabs['spec'], spec_subfig, spec_axes[-1, 0], spec_axes[0, 0], **ylab_spec_kwargs)
[hide_ticks(ax, side='bottom') for ax in spec_axes[0, :]]
[hide_ticks(ax, side='left') for ax in spec_axes[:, 1:].ravel()]
letter_subplots(spec_axes[0, :], labels='abc', **spec_letter_kwargs)

# Prepare kernel insets:
x0 = np.linspace(0, 1, kernels.shape[0] + 1)[:-1] + 1 / kernels.shape[0] / 2
x0 -= inset_kwargs['w'] / 2
insets = []
for i in range(kernels.shape[0]):
    bounds = [x0[i], inset_kwargs['y0'], inset_kwargs['w'], inset_kwargs['h']]
    inset = spec_axes[0, 0].inset_axes(bounds)
    inset.set_title(rf'$k_{{{i+1}}}$', fontsize=20)
    inset.axis('off')
    insets.append(inset)

# Prepare feature space axes:
big_subfig = fig.add_subfigure(super_grid[subfig_specs['big']])
big_grid = big_subfig.add_gridspec(**big_grid_kwargs)
big_axes = np.zeros(super_grid_kwargs['nrows'], dtype=object)
for i in range(big_axes.size):
    ax = big_subfig.add_subplot(big_grid[i, 0])
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.xaxis.set_major_locator(plt.MultipleLocator(xloc['big']))
    ax.yaxis.set_major_locator(plt.MultipleLocator(yloc['big']))
    ax.set_aspect(**anchor_kwargs)
    # ax.set_ylabel(ylabs['big'], **ylab_big_kwargs)
    ylabel(ax, ylabs['big'], transform=big_subfig.transSubfigure, **ylab_big_kwargs)
    big_axes[i] = ax
super_xlabel(xlabs['big'], big_subfig, big_axes[1], big_axes[1], **xlab_big_kwargs)
hide_ticks(big_axes[0], side='bottom')
letter_subplot(big_axes[0], 'd', ref=spec_axes[0, 0], **big_letter_kwargs)

# Prepare colorbars:
bar_ax = big_subfig.add_axes(cbar_bounds)
bar_axes = split_subplot(bar_ax, side=['right', 'right'], size=100, pad=0)
bar_axes = [bar_ax] + bar_axes
for ax in bar_axes:
    ax.spines[['right', 'top']].set_visible(True)
    hide_ticks(ax, 'bottom', ticks=False)
    hide_ticks(ax, 'left', ticks=False)
bar_axes[-1].tick_params(axis='y', which='both', right=True, labelright=True)
# plt.show()

# Plot results per species:
for i, pure_path in enumerate(pure_paths):
    print(f'Processing {pure_path}')
    noise_path = pure_path.replace('.npz', '_noise.npz')

    # Load invariance data:
    pure_data, config = load_data(pure_path, **load_kwargs)
    noise_data, _ = load_data(noise_path, **load_kwargs)
    scales = pure_data['scales']

    # Reduce to kernel subset and single threshold:
    thresh_ind = np.nonzero(pure_data['thresh_perc'] == thresh_percent)[0][0]
    kern_inds = find_kern_specs(config['k_specs'], kerns=kernels)
    config['k_specs'] = config['k_specs'][kern_inds]
    config['kernels'] = config['kernels'][:, kern_inds]
    pure_measure = pure_data['measure_feat'][:, kern_inds, thresh_ind]
    noise_measure = noise_data['measure_feat'][:, kern_inds, thresh_ind]

    # Plot invariance curves:
    pure_ax, noise_ax = spec_axes[:, i]
    pure_ax.plot(scales, pure_measure, c=base_color, lw=lw['spec'])
    noise_ax.plot(scales, noise_measure, c=base_color, lw=lw['spec'])

    if i == 0:
        # Indicate kernel waveforms:
        ylims = ylimits(config['kernels'], pad=0.05)
        xlims = (config['k_times'][0], config['k_times'][-1])
        for j, inset in enumerate(insets):
            inset.plot(config['k_times'], config['kernels'][:, j],
            c='k', lw=lw['kern'])
            inset.set_xlim(xlims)
            inset.set_ylim(ylims)
        time_bar(insets[0], parent=spec_axes[0, 0], **time_bar_kwargs)

    # Prepare shaded colors:
    # factors = np.linspace(*shade_factors, scales.size)
    # shaded_colors = shade_colors(spec_colors[i], factors)

    # Plot pure feature space:
    handle = big_axes[0].scatter(pure_measure[:, 0], pure_measure[:, 1],
                                 c=scales, cmap=spec_cmaps[i], **space_kwargs)

    # Plot noise feature space:
    big_axes[1].scatter(noise_measure[:, 0], noise_measure[:, 1],
                        c=scales, cmap=spec_cmaps[i], **space_kwargs)

    # Indicate scale color code:
    big_subfig.colorbar(handle, cax=bar_axes[i])

if save_path is not None:
    fig.savefig(save_path)
plt.show()

print('Done.')
embed()