441 lines
13 KiB
Python
441 lines
13 KiB
Python
import plotstyle_plt
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from itertools import product
|
|
from scipy.stats import ttest_ind
|
|
from thunderhopper.modeltools import load_data
|
|
from thunderhopper.filetools import search_files
|
|
from thunderhopper.filtertools import find_kern_specs
|
|
from misc_functions import shorten_species
|
|
from color_functions import load_colors
|
|
from plot_functions import hide_ticks, xlabel, ylabel
|
|
from IPython import embed
|
|
|
|
|
|
# GENERAL SETTINGS:
|
|
cross_species = [
|
|
'Chorthippus_biguttulus',
|
|
# 'Chorthippus_mollis',
|
|
'Chrysochraon_dispar',
|
|
# 'Euchorthippus_declivus',
|
|
'Gomphocerippus_rufus',
|
|
'Omocestus_rufipes',
|
|
'Pseudochorthippus_parallelus',
|
|
]
|
|
n_spec = len(cross_species)
|
|
example_files = {
|
|
'Chorthippus_biguttulus': 'Chorthippus_biguttulus_GBC_94-17s73.1ms-19s977ms',
|
|
'Chorthippus_mollis': 'Chorthippus_mollis_DJN_41_T28C-46s4.58ms-1m15s697ms',
|
|
'Chrysochraon_dispar': 'Chrysochraon_dispar_DJN_26_T28C_DT-32s134ms-34s432ms',
|
|
'Euchorthippus_declivus': 'Euchorthippus_declivus_FTN_79-2s167ms-2s563ms',
|
|
'Gomphocerippus_rufus': 'Gomphocerippus_rufus_FTN_91-3-884ms-10s427ms',
|
|
'Omocestus_rufipes': 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms',
|
|
'Pseudochorthippus_parallelus': 'Pseudochorthippus_parallelus_GBC_88-6s678ms-9s32.3ms'
|
|
}
|
|
in_species = [
|
|
'Chorthippus_biguttulus',
|
|
'Chorthippus_mollis',
|
|
'Chrysochraon_dispar',
|
|
'Euchorthippus_declivus',
|
|
'Gomphocerippus_rufus',
|
|
'Omocestus_rufipes',
|
|
'Pseudochorthippus_parallelus',
|
|
][5]
|
|
save_path = '../figures/fig_features_cross_species.pdf'
|
|
|
|
|
|
# ANALYSIS SETTINGS:
|
|
thresh_rel = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3])[5]
|
|
single_spec_file = True # Only use example files for cross-species comparison
|
|
equalize_spec_files = False # Prune to minimum available across species
|
|
n_song = n_spec#None # Limit to n first songs of in-species dataset (None for all)
|
|
color_kern_types = True
|
|
calculate_regression = True
|
|
test_regression = True
|
|
|
|
# SUBSET SETTINGS:
|
|
types = np.array([1, -1, 2, -2, 3, -3, 4, -4])
|
|
# types = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8, -8, 9, -9, 10, -10]
|
|
sigmas = np.array([0.001, 0.002, 0.004, 0.008, 0.016])
|
|
# sigmas = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032]
|
|
kernels = None
|
|
reduce_kernels = any(var is not None for var in [kernels, types, sigmas])
|
|
|
|
|
|
# GRAPH SETTINGS:
|
|
fig_kwargs = dict(
|
|
figsize=(32/2.54, 32/2.54),
|
|
)
|
|
spec_grid_kwargs = dict(
|
|
wspace=0.1,
|
|
hspace=0.1,
|
|
left=0.07,
|
|
right=0.93,
|
|
bottom=0.07,
|
|
top=0.93,
|
|
)
|
|
song_grid_kwargs = dict(
|
|
wspace=0.1,
|
|
hspace=0.1,
|
|
left=spec_grid_kwargs['left'],
|
|
right=spec_grid_kwargs['right'],
|
|
bottom=spec_grid_kwargs['bottom'],
|
|
top=spec_grid_kwargs['top'],
|
|
)
|
|
|
|
# PLOT SETTINGS:
|
|
kern_colors = load_colors('../data/feat_colors_subset.npz')
|
|
fs = dict(
|
|
lab_norm=16,
|
|
lab_tex=20,
|
|
letter=22,
|
|
tit_norm=16,
|
|
tit_tex=20,
|
|
bar=16,
|
|
)
|
|
label_song_prefix = ['song ', ''][0]
|
|
xlab_low_kwargs = dict(
|
|
y=0.01,
|
|
fontsize=fs['lab_norm'],
|
|
ha='center',
|
|
va='bottom',
|
|
fontstyle='italic',
|
|
)
|
|
xlab_up_kwargs = dict(
|
|
y=0.99,
|
|
fontsize=fs['lab_norm'],
|
|
ha='center',
|
|
va='top',
|
|
)
|
|
ylab_low_kwargs = dict(
|
|
x=0.01,
|
|
fontsize=fs['lab_norm'],
|
|
ha='center',
|
|
va='top',
|
|
fontstyle='italic',
|
|
)
|
|
ylab_up_kwargs = dict(
|
|
x=0.99,
|
|
rotation=270,
|
|
fontsize=fs['lab_norm'],
|
|
ha='center',
|
|
va='top',
|
|
)
|
|
loc = 1
|
|
dot_spec_kwargs = dict(
|
|
ls='none',
|
|
marker='o',
|
|
mec='w',
|
|
ms=6,
|
|
mew=0.5,
|
|
alpha=1,
|
|
zorder=2,
|
|
)
|
|
dot_song_kwargs = dict(
|
|
ls='none',
|
|
marker='o',
|
|
mec='w',
|
|
ms=6,
|
|
mew=0.5,
|
|
alpha=1,
|
|
zorder=2,
|
|
)
|
|
if not color_kern_types:
|
|
dot_spec_kwargs['mfc'] = 'k'
|
|
dot_song_kwargs['mfc'] = 'k'
|
|
diag_fig_kwargs = dict(
|
|
c='k',
|
|
lw=1,
|
|
ls='-'
|
|
)
|
|
diag_ax_kwargs = dict(
|
|
c='k',
|
|
lw=1,
|
|
ls='-',
|
|
zorder=3,
|
|
)
|
|
text_spec_kwargs = dict(
|
|
x=0.05,
|
|
y=0.95,
|
|
ha='left',
|
|
va='top',
|
|
fontsize=16,
|
|
)
|
|
text_song_kwargs = dict(
|
|
x=0,
|
|
y=0.95,
|
|
ha='left',
|
|
va='top',
|
|
fontsize=16,
|
|
)
|
|
text_spec_prefix = '$\\rho\\,=\\,$'
|
|
text_song_prefix = ['$\\rho\\,=\\,$', ''][0]
|
|
if test_regression:
|
|
test_ax_side = 0.1
|
|
test_ax_bounds = [
|
|
song_grid_kwargs['right'] - test_ax_side,
|
|
spec_grid_kwargs['bottom'],
|
|
test_ax_side,
|
|
test_ax_side
|
|
]
|
|
ylab_test = '$\\rho$'
|
|
yloc_test = 1
|
|
ylab_test_kwargs = dict(
|
|
x=-0.3,
|
|
fontsize=fs['lab_norm'],
|
|
ha='center',
|
|
va='bottom',
|
|
)
|
|
boxplot_kwargs = dict(
|
|
positions=[0, 1],
|
|
widths=0.9,
|
|
tick_labels=['inter', 'intra'],
|
|
zorder=1,
|
|
medianprops=dict(
|
|
color='k',
|
|
lw=1,
|
|
),
|
|
boxprops=dict(
|
|
color='k',
|
|
lw=1,
|
|
),
|
|
)
|
|
boxplot_kwargs.update(
|
|
capprops=boxplot_kwargs['boxprops'],
|
|
whiskerprops=boxplot_kwargs['boxprops'],
|
|
)
|
|
boxplot_dot_kwargs = dict(
|
|
ls='none',
|
|
marker='o',
|
|
ms=4,
|
|
mec='k',
|
|
mfc='w',
|
|
mew=1.5,
|
|
alpha=0.5,
|
|
zorder=2,
|
|
)
|
|
|
|
|
|
nbins = 10
|
|
spec_color = 'darkorchid'
|
|
song_color = 'goldenrod'
|
|
bar_kwargs = dict(
|
|
width=0.95,
|
|
alpha=0.75,
|
|
)
|
|
mean_kwargs = dict(
|
|
ls='--',
|
|
lw=1,
|
|
)
|
|
leg_kwargs = dict(
|
|
ncols=1,
|
|
loc='lower center',
|
|
bbox_to_anchor=(0, 1.05, 1, 0.5),
|
|
frameon=False,
|
|
prop=dict(
|
|
size=14,
|
|
),
|
|
borderpad=0,
|
|
borderaxespad=0,
|
|
handlelength=1,
|
|
columnspacing=1,
|
|
handletextpad=0.5,
|
|
labelspacing=0.1
|
|
)
|
|
|
|
|
|
# EXECUTION:
|
|
|
|
# Gather cross-species data:
|
|
spec_data, min_files = {}, np.inf
|
|
for species in cross_species:
|
|
# Load species data:
|
|
if single_spec_file:
|
|
path = search_files(example_files[species], dir='../data/inv/full/')[0]
|
|
else:
|
|
path = search_files(species, dir='../data/inv/full/collected/')[0]
|
|
data, config = load_data(path, ['measure_feat', 'thresh_rel'])
|
|
# Reduce to single threshold, last scale, optional kernel subset:
|
|
thresh_ind = np.nonzero(data['thresh_rel'] == thresh_rel)[0][0]
|
|
kern_inds = slice(None)
|
|
if reduce_kernels:
|
|
kern_inds = find_kern_specs(config['k_specs'], kernels, types, sigmas)
|
|
config['kernels'] = config['kernels'][:, kern_inds]
|
|
config['k_specs'] = config['k_specs'][kern_inds, :]
|
|
spec_data[species] = data['measure_feat'][-1, kern_inds, thresh_ind, ...]
|
|
if single_spec_file:
|
|
# Ensure consistent shape (features, files=1):
|
|
spec_data[species] = spec_data[species][..., None]
|
|
# Update number of possible comparisons:
|
|
min_files = min(min_files, spec_data[species].shape[-1])
|
|
|
|
if equalize_spec_files and not single_spec_file:
|
|
# Equalize number of files across species:
|
|
spec_data = {spec: feat[:, :min_files] for spec, feat in spec_data.items()}
|
|
|
|
# Gather in-species data:
|
|
paths = search_files(in_species, dir='../data/inv/full/')
|
|
if n_song is not None:
|
|
paths = paths[:n_song]
|
|
else:
|
|
n_song = len(paths)
|
|
song_data = np.zeros((n_song, config['k_specs'].shape[0]))
|
|
for i, path in enumerate(paths):
|
|
# Load song-specific dataset:
|
|
data, config = load_data(path, ['measure_feat', 'thresh_rel'])
|
|
# Reduce to single threshold, last scale, optional kernel subset:
|
|
thresh_ind = np.nonzero(data['thresh_rel'] == thresh_rel)[0][0]
|
|
kern_inds = slice(None)
|
|
if reduce_kernels:
|
|
kern_inds = find_kern_specs(config['k_specs'], kernels, types, sigmas)
|
|
config['kernels'] = config['kernels'][:, kern_inds]
|
|
config['k_specs'] = config['k_specs'][kern_inds, :]
|
|
song_data[i] = data['measure_feat'][-1, kern_inds, thresh_ind]
|
|
|
|
# Prepare graph:
|
|
fig = plt.figure(**fig_kwargs)
|
|
spec_grid = fig.add_gridspec(nrows=n_spec, ncols=n_spec, **spec_grid_kwargs)
|
|
song_grid = fig.add_gridspec(nrows=n_song, ncols=n_song, **song_grid_kwargs)
|
|
# fig.add_artist(plt.Line2D([0, 1], [1, 0], **diag_fig_kwargs))
|
|
|
|
# Prepare cross-species axes:
|
|
spec_labels = [shorten_species(spec) for spec in cross_species]
|
|
spec_axes = np.zeros((n_spec, n_spec), dtype=object)
|
|
for i, j in product(range(n_spec), range(n_spec)):
|
|
if i <= j:
|
|
continue
|
|
# Lower trianglular:
|
|
ax = fig.add_subplot(spec_grid[i, j])
|
|
ax.set_xlim(0, 1)
|
|
ax.set_ylim(0, 1)
|
|
ax.yaxis.set_major_locator(plt.MultipleLocator(loc))
|
|
ax.xaxis.set_major_locator(plt.MultipleLocator(loc))
|
|
# Indicate subplot diagonal:
|
|
ax.plot([0, 1], [0, 1], **diag_ax_kwargs)
|
|
# Adjust:
|
|
if j > 0:
|
|
hide_ticks(ax, 'left')
|
|
if i < n_spec - 1:
|
|
hide_ticks(ax, 'bottom')
|
|
if j == 0:
|
|
ylabel(ax, spec_labels[i], transform=fig.transFigure, **ylab_low_kwargs)
|
|
if i == n_spec - 1:
|
|
xlabel(ax, spec_labels[j], transform=fig.transFigure, **xlab_low_kwargs)
|
|
spec_axes[i, j] = ax
|
|
|
|
# Prepare in-species axes:
|
|
song_labels = [label_song_prefix + f'{i}' for i in range(1, n_song + 1)]
|
|
song_axes = np.zeros((n_song, n_song), dtype=object)
|
|
for i, j in product(range(n_song), range(n_song)):
|
|
if i >= j:
|
|
continue
|
|
# Upper triangular:
|
|
ax = fig.add_subplot(song_grid[i, j])
|
|
ax.set_xlim(0, 1)
|
|
ax.set_ylim(0, 1)
|
|
ax.yaxis.set_major_locator(plt.MultipleLocator(loc))
|
|
ax.xaxis.set_major_locator(plt.MultipleLocator(loc))
|
|
ax.spines[['top', 'right']].set_visible(True)
|
|
ax.spines[['bottom', 'left']].set_visible(False)
|
|
ax.yaxis.set_label_position('right')
|
|
ax.yaxis.tick_right()
|
|
ax.xaxis.set_label_position('top')
|
|
ax.xaxis.tick_top()
|
|
# Indicate subplot diagonal:
|
|
ax.plot([0, 1], [0, 1], **diag_ax_kwargs)
|
|
# Adjust:
|
|
if i > 0:
|
|
hide_ticks(ax, 'top')
|
|
if j < n_song - 1:
|
|
hide_ticks(ax, 'right')
|
|
if i == 0:
|
|
xlabel(ax, song_labels[j], transform=fig.transFigure, **xlab_up_kwargs)
|
|
if j == n_song - 1:
|
|
ylabel(ax, song_labels[i], transform=fig.transFigure, **ylab_up_kwargs)
|
|
song_axes[i, j] = ax
|
|
|
|
# Remember coefficients:
|
|
if calculate_regression:
|
|
spec_regs, song_regs = [], []
|
|
|
|
# Plot cross-species comparisons:
|
|
for x, y in product(range(n_spec), range(n_spec)):
|
|
if x >= y:
|
|
continue
|
|
# Get axis and data:
|
|
ax = spec_axes[y, x]
|
|
x_spec = spec_data[cross_species[x]]
|
|
y_spec = spec_data[cross_species[y]]
|
|
x_reg, y_reg = [], []
|
|
# Plot features of species 1 against species 2 (all files cross-wise):
|
|
for i, j in product(range(x_spec.shape[-1]), range(y_spec.shape[-1])):
|
|
if color_kern_types:
|
|
for k, (f1, f2) in enumerate(zip(x_spec[:, i], y_spec[:, j])):
|
|
c = kern_colors[str(int(config['k_specs'][k, 0]))]
|
|
ax.plot(f1, f2, mfc=c, **dot_spec_kwargs)
|
|
else:
|
|
ax.plot(x_spec[:, i], y_spec[:, j], **dot_spec_kwargs)
|
|
# Accumulate value pairs:
|
|
if calculate_regression:
|
|
x_reg.append(x_spec[:, i])
|
|
y_reg.append(y_spec[:, j])
|
|
# Get regression coefficient:
|
|
if calculate_regression:
|
|
rho = np.corrcoef(np.concatenate(x_reg), np.concatenate(y_reg))[0, 1]
|
|
ax.text(s=text_spec_prefix + f'{rho:.2f}', transform=ax.transAxes, **text_spec_kwargs)
|
|
spec_regs.append(rho)
|
|
# print('\nAxis position: ', (y, x))
|
|
# print(cross_species[x], '(x) vs.', cross_species[y], '(y)')
|
|
|
|
# Plot in-species comparisons:
|
|
for x, y in product(range(n_song), range(n_song)):
|
|
if x <= y:
|
|
continue
|
|
# Get axis and data:
|
|
ax = song_axes[y, x]
|
|
x_song = song_data[x]
|
|
y_song = song_data[y]
|
|
# Plot features of song 1 against song 2:
|
|
if color_kern_types:
|
|
for i, (f1, f2) in enumerate(zip(x_song, y_song)):
|
|
c = kern_colors[str(int(config['k_specs'][i, 0]))]
|
|
ax.plot(f1, f2, mfc=c, **dot_song_kwargs)
|
|
else:
|
|
ax.plot(x_song, y_song, **dot_song_kwargs)
|
|
# Get regression coefficient:
|
|
if calculate_regression:
|
|
rho = np.corrcoef(x_song, y_song)[0, 1]
|
|
ax.text(s=text_song_prefix + f'{rho:.2f}', transform=ax.transAxes, **text_song_kwargs)
|
|
song_regs.append(rho)
|
|
# print('\nAxis position: ', (y, x))
|
|
# print(f'Song {song_labels[x]} (x) vs. Song {song_labels[y]} (y)')
|
|
|
|
if test_regression:
|
|
# Add test result subplot:
|
|
test_ax = fig.add_subplot(test_ax_bounds)
|
|
test_ax.set_xlim(-0.6, 1.6)
|
|
test_ax.set_ylim(0, 1)
|
|
test_ax.yaxis.set_major_locator(plt.MultipleLocator(yloc_test))
|
|
ylabel(test_ax, ylab_test, **ylab_test_kwargs)
|
|
|
|
# Show boxplots of correlation coefficients:
|
|
test_ax.boxplot([spec_regs, song_regs], **boxplot_kwargs)
|
|
|
|
# Show underlying datapoints:
|
|
test_ax.plot(np.zeros(len(spec_regs)), spec_regs, **boxplot_dot_kwargs)
|
|
test_ax.plot(np.ones(len(song_regs)), song_regs, **boxplot_dot_kwargs)
|
|
|
|
# Perform t-test:
|
|
test = ttest_ind(spec_regs, song_regs, equal_var=False)
|
|
t, p = test.pvalue, test.statistic
|
|
print(f'\nT-test result: t={t}, p={p}')
|
|
|
|
if save_path is not None:
|
|
fig.savefig(save_path)
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|