import json from thunderhopper.filetools import search_files from misc_functions import sort_files_by_rec from IPython import embed # GENERAL SETTINGS: target_species = [ 'Chorthippus_biguttulus', 'Chorthippus_mollis', 'Chrysochraon_dispar', 'Gomphocerippus_rufus', 'Omocestus_rufipes', 'Pseudochorthippus_parallelus', ] search_kwargs = dict( ext='.wav', dir='../data/raw/', ) sources = ['BM04', 'BM93', 'DJN', 'GBC', 'FTN'] # EXECUTION: n_songs, n_recs, n_sources = {}, {}, {} for species in target_species: print(f'\nCounting {species} songs...') # Fetch all species-specific song files: paths = search_files(species, **search_kwargs) # Sort song files by recording: sorted_paths = sort_files_by_rec(paths, sources, return_dict=True) # Count number of available sources: n_sources[species] = len(sorted_paths) # Count number of available recordings: n_recs[species] = sum(len(rec_paths) for rec_paths in sorted_paths.values()) # Count number of available songs: n_songs[species] = sum(sum(len(paths) for paths in rec_paths) for rec_paths in sorted_paths.values()) print(f'Found {n_songs[species]} songs from {n_recs[species]} recordings across {n_sources[species]} sources.')