import plotstyle_plt import numpy as np import matplotlib.pyplot as plt from itertools import product from scipy.stats import ttest_ind from thunderhopper.modeltools import load_data from thunderhopper.filetools import search_files from thunderhopper.filtertools import find_kern_specs from misc_functions import shorten_species from color_functions import load_colors from plot_functions import hide_ticks, xlabel, ylabel from IPython import embed # GENERAL SETTINGS: cross_species = [ # 'Chorthippus_biguttulus', # 'Chorthippus_mollis', # 'Chrysochraon_dispar', # 'Euchorthippus_declivus', 'Gomphocerippus_rufus', 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ] n_spec = len(cross_species) example_files = { 'Chorthippus_biguttulus': 'Chorthippus_biguttulus_GBC_94-17s73.1ms-19s977ms', 'Chorthippus_mollis': 'Chorthippus_mollis_DJN_41_T28C-46s4.58ms-1m15s697ms', 'Chrysochraon_dispar': 'Chrysochraon_dispar_DJN_26_T28C_DT-32s134ms-34s432ms', 'Euchorthippus_declivus': 'Euchorthippus_declivus_FTN_79-2s167ms-2s563ms', 'Gomphocerippus_rufus': 'Gomphocerippus_rufus_FTN_91-3-884ms-10s427ms', 'Omocestus_rufipes': 'Omocestus_rufipes_DJN_32-40s724ms-48s779ms', 'Pseudochorthippus_parallelus': 'Pseudochorthippus_parallelus_GBC_88-6s678ms-9s32.3ms' } in_species = [ 'Chorthippus_biguttulus', 'Chorthippus_mollis', 'Chrysochraon_dispar', 'Euchorthippus_declivus', 'Gomphocerippus_rufus', 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ][5] save_path = '../figures/fig_features_cross_species.pdf' # ANALYSIS SETTINGS: thresh_rel = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3])[4] single_spec_file = True # Only use example files for cross-species comparison equalize_spec_files = False # Prune to minimum available across species n_song = n_spec#None # Limit to n first songs of in-species dataset (None for all) color_kern_types = True calculate_regression = True test_regression = True # SUBSET SETTINGS: types = np.array([1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8, -8, 9, -9, 10, -10]) # types = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8, -8, 9, -9, 10, -10] sigmas = np.array([0.001, 0.002, 0.004, 0.008, 0.016, 0.032]) # sigmas = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032] kernels = None reduce_kernels = any(var is not None for var in [kernels, types, sigmas]) # GRAPH SETTINGS: fig_kwargs = dict( figsize=(32/2.54, 32/2.54), ) spec_grid_kwargs = dict( wspace=0.1, hspace=0.1, left=0.07, right=0.93, bottom=0.07, top=0.93, ) song_grid_kwargs = dict( wspace=0.1, hspace=0.1, left=spec_grid_kwargs['left'], right=spec_grid_kwargs['right'], bottom=spec_grid_kwargs['bottom'], top=spec_grid_kwargs['top'], ) # PLOT SETTINGS: kern_colors = load_colors('../data/feat_colors_all.npz') fs = dict( lab_norm=16, lab_tex=20, letter=22, tit_norm=16, tit_tex=20, bar=16, ) label_song_prefix = ['song ', ''][0] xlab_low_kwargs = dict( y=0.01, fontsize=fs['lab_norm'], ha='center', va='bottom', fontstyle='italic', ) xlab_up_kwargs = dict( y=0.99, fontsize=fs['lab_norm'], ha='center', va='top', ) ylab_low_kwargs = dict( x=0.01, fontsize=fs['lab_norm'], ha='center', va='top', fontstyle='italic', ) ylab_up_kwargs = dict( x=0.99, rotation=270, fontsize=fs['lab_norm'], ha='center', va='top', ) loc = 0.5 dot_spec_kwargs = dict( ls='none', marker='o', mec='w', ms=6, mew=0.5, alpha=1, zorder=2, ) dot_song_kwargs = dict( ls='none', marker='o', mec='w', ms=6, mew=0.5, alpha=1, zorder=2, ) if not color_kern_types: dot_spec_kwargs['mfc'] = 'k' dot_song_kwargs['mfc'] = 'k' diag_fig_kwargs = dict( c='k', lw=1, ls='-' ) diag_ax_kwargs = dict( c='k', lw=1, ls='-', zorder=3, ) text_spec_kwargs = dict( x=0.05, y=0.95, ha='left', va='top', fontsize=16, ) text_song_kwargs = dict( x=0, y=0.95, ha='left', va='top', fontsize=16, ) text_spec_prefix = '$\\rho\\,=\\,$' text_song_prefix = ['$\\rho\\,=\\,$', ''][0] if test_regression: test_ax_side = 0.15 test_ax_bounds = [ song_grid_kwargs['right'] - test_ax_side, spec_grid_kwargs['bottom'], test_ax_side, test_ax_side ] xlab_test = '$\\rho$' ylab_test = '$\\text{PDF}_{\\rho}$' xloc_test = 0.5 yloc_test = 10 ylab_test_kwargs = dict( x=-0.3, fontsize=fs['lab_norm'], ha='center', va='bottom', ) nbins = 10 spec_color = 'darkorchid' song_color = 'goldenrod' bar_kwargs = dict( width=0.95, alpha=0.75, ) mean_kwargs = dict( ls='--', lw=1, ) leg_kwargs = dict( ncols=1, loc='lower center', bbox_to_anchor=(0, 1.05, 1, 0.5), frameon=False, prop=dict( size=14, ), borderpad=0, borderaxespad=0, handlelength=1, columnspacing=1, handletextpad=0.5, labelspacing=0.1 ) # EXECUTION: # Gather cross-species data: spec_data, min_files = {}, np.inf for species in cross_species: # Load species data: if single_spec_file: path = search_files(example_files[species], dir='../data/inv/full/')[0] else: path = search_files(species, dir='../data/inv/full/collected/')[0] data, config = load_data(path, ['measure_feat', 'thresh_rel']) # Reduce to single threshold, last scale, optional kernel subset: thresh_ind = np.nonzero(data['thresh_rel'] == thresh_rel)[0][0] kern_inds = slice(None) if reduce_kernels: kern_inds = find_kern_specs(config['k_specs'], kernels, types, sigmas) config['kernels'] = config['kernels'][:, kern_inds] config['k_specs'] = config['k_specs'][kern_inds, :] spec_data[species] = data['measure_feat'][-1, kern_inds, thresh_ind, ...] if single_spec_file: # Ensure consistent shape (features, files=1): spec_data[species] = spec_data[species][..., None] # Update number of possible comparisons: min_files = min(min_files, spec_data[species].shape[-1]) if equalize_spec_files and not single_spec_file: # Equalize number of files across species: spec_data = {spec: feat[:, :min_files] for spec, feat in spec_data.items()} # Gather in-species data: paths = search_files(in_species, dir='../data/inv/full/') if n_song is not None: paths = paths[:n_song] else: n_song = len(paths) song_data = np.zeros((n_song, config['k_specs'].shape[0])) for i, path in enumerate(paths): # Load song-specific dataset: data, config = load_data(path, ['measure_feat', 'thresh_rel']) # Reduce to single threshold, last scale, optional kernel subset: thresh_ind = np.nonzero(data['thresh_rel'] == thresh_rel)[0][0] kern_inds = slice(None) if reduce_kernels: kern_inds = find_kern_specs(config['k_specs'], kernels, types, sigmas) config['kernels'] = config['kernels'][:, kern_inds] config['k_specs'] = config['k_specs'][kern_inds, :] song_data[i] = data['measure_feat'][-1, kern_inds, thresh_ind] # Prepare graph: fig = plt.figure(**fig_kwargs) spec_grid = fig.add_gridspec(nrows=n_spec, ncols=n_spec, **spec_grid_kwargs) song_grid = fig.add_gridspec(nrows=n_song, ncols=n_song, **song_grid_kwargs) # fig.add_artist(plt.Line2D([0, 1], [1, 0], **diag_fig_kwargs)) # Prepare cross-species axes: spec_labels = [shorten_species(spec) for spec in cross_species] spec_axes = np.zeros((n_spec, n_spec), dtype=object) for i, j in product(range(n_spec), range(n_spec)): if i <= j: continue # Lower trianglular: ax = fig.add_subplot(spec_grid[i, j]) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.yaxis.set_major_locator(plt.MultipleLocator(loc)) ax.xaxis.set_major_locator(plt.MultipleLocator(loc)) # Indicate subplot diagonal: ax.plot([0, 1], [0, 1], **diag_ax_kwargs) # Adjust: if j > 0: hide_ticks(ax, 'left') if i < n_spec - 1: hide_ticks(ax, 'bottom') if j == 0: ylabel(ax, spec_labels[i], transform=fig.transFigure, **ylab_low_kwargs) if i == n_spec - 1: xlabel(ax, spec_labels[j], transform=fig.transFigure, **xlab_low_kwargs) spec_axes[i, j] = ax # Prepare in-species axes: song_labels = [label_song_prefix + f'{i}' for i in range(1, n_song + 1)] song_axes = np.zeros((n_song, n_song), dtype=object) for i, j in product(range(n_song), range(n_song)): if i >= j: continue # Upper triangular: ax = fig.add_subplot(song_grid[i, j]) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.yaxis.set_major_locator(plt.MultipleLocator(loc)) ax.xaxis.set_major_locator(plt.MultipleLocator(loc)) ax.spines[['top', 'right']].set_visible(True) ax.spines[['bottom', 'left']].set_visible(False) ax.yaxis.set_label_position('right') ax.yaxis.tick_right() ax.xaxis.set_label_position('top') ax.xaxis.tick_top() # Indicate subplot diagonal: ax.plot([0, 1], [0, 1], **diag_ax_kwargs) # Adjust: if i > 0: hide_ticks(ax, 'top') if j < n_song - 1: hide_ticks(ax, 'right') if i == 0: xlabel(ax, song_labels[j], transform=fig.transFigure, **xlab_up_kwargs) if j == n_song - 1: ylabel(ax, song_labels[i], transform=fig.transFigure, **ylab_up_kwargs) song_axes[i, j] = ax # Remember coefficients: if calculate_regression: spec_regs, song_regs = [], [] # Plot cross-species comparisons: for x, y in product(range(n_spec), range(n_spec)): if x >= y: continue # Get axis and data: ax = spec_axes[y, x] x_spec = spec_data[cross_species[x]] y_spec = spec_data[cross_species[y]] x_reg, y_reg = [], [] # Plot features of species 1 against species 2 (all files cross-wise): for i, j in product(range(x_spec.shape[-1]), range(y_spec.shape[-1])): if color_kern_types: for k, (f1, f2) in enumerate(zip(x_spec[:, i], y_spec[:, j])): c = kern_colors[str(int(config['k_specs'][k, 0]))] ax.plot(f1, f2, mfc=c, **dot_spec_kwargs) else: ax.plot(x_spec[:, i], y_spec[:, j], **dot_spec_kwargs) # Accumulate value pairs: if calculate_regression: x_reg.append(x_spec[:, i]) y_reg.append(y_spec[:, j]) # Get regression coefficient: if calculate_regression: rho = np.corrcoef(np.concatenate(x_reg), np.concatenate(y_reg))[0, 1] ax.text(s=text_spec_prefix + f'{rho:.2f}', transform=ax.transAxes, **text_spec_kwargs) spec_regs.append(rho) # print('\nAxis position: ', (y, x)) # print(cross_species[x], '(x) vs.', cross_species[y], '(y)') # Plot in-species comparisons: for x, y in product(range(n_song), range(n_song)): if x <= y: continue # Get axis and data: ax = song_axes[y, x] x_song = song_data[x] y_song = song_data[y] # Plot features of song 1 against song 2: if color_kern_types: for i, (f1, f2) in enumerate(zip(x_song, y_song)): c = kern_colors[str(int(config['k_specs'][i, 0]))] ax.plot(f1, f2, mfc=c, **dot_song_kwargs) else: ax.plot(x_song, y_song, **dot_song_kwargs) # Get regression coefficient: if calculate_regression: rho = np.corrcoef(x_song, y_song)[0, 1] ax.text(s=text_song_prefix + f'{rho:.2f}', transform=ax.transAxes, **text_song_kwargs) song_regs.append(rho) # print('\nAxis position: ', (y, x)) # print(f'Song {song_labels[x]} (x) vs. Song {song_labels[y]} (y)') if test_regression: # Add test result subplot: test_ax = fig.add_subplot(test_ax_bounds) test_ax.xaxis.set_major_locator(plt.MultipleLocator(xloc_test)) test_ax.yaxis.set_major_locator(plt.MultipleLocator(yloc_test)) xlabel(test_ax, xlab_test, transform=fig.transFigure, **xlab_low_kwargs) ylabel(test_ax, ylab_test, **ylab_test_kwargs) # Perform t-test: test = ttest_ind(spec_regs, song_regs, equal_var=False) t, p = test.pvalue, test.statistic print(f'\nT-test result: t={t}, p={p}') # Calculate histograms: limits = np.array([min(spec_regs + song_regs), max(spec_regs + song_regs)]) limits += np.array([-1.1, 1.1]) * (limits[1] - limits[0]) edges = np.linspace(*limits, nbins + 1) centers = edges[:-1] + (edges[1] - edges[0]) / 2 spec_hist, _ = np.histogram(spec_regs, bins=edges, density=True) song_hist, _ = np.histogram(song_regs, bins=edges, density=True) # Plot histograms: bar_kwargs['width'] *= (centers[1] - centers[0]) test_ax.bar(centers, spec_hist, color=spec_color, label='inter-species', **bar_kwargs) test_ax.bar(centers, song_hist, color=song_color, label='intra-species', **bar_kwargs) # Indicate means: test_ax.axvline(np.mean(spec_regs), color=spec_color, **mean_kwargs) test_ax.axvline(np.mean(song_regs), color=song_color, **mean_kwargs) # Add legend: test_ax.legend(**leg_kwargs) # Posthocs: test_ax.set_ylim(0, max(spec_hist.max(), song_hist.max()) * 1.05) test_ax.set_xlim(min(0, max(-1, limits[0])), min(1, limits[1])) if save_path is not None: fig.savefig(save_path) plt.show()