Files
paper_2025/python/count_species_songs.py

41 lines
1.2 KiB
Python

import json
from thunderhopper.filetools import search_files
from misc_functions import sort_files_by_rec
from IPython import embed
# GENERAL SETTINGS:
target_species = [
'Chorthippus_biguttulus',
'Chorthippus_mollis',
'Chrysochraon_dispar',
'Gomphocerippus_rufus',
'Omocestus_rufipes',
'Pseudochorthippus_parallelus',
]
search_kwargs = dict(
ext='.wav',
dir='../data/raw/',
)
sources = ['BM04', 'BM93', 'DJN', 'GBC', 'FTN']
# EXECUTION:
n_songs, n_recs, n_sources = {}, {}, {}
for species in target_species:
print(f'\nCounting {species} songs...')
# Fetch all species-specific song files:
paths = search_files(species, **search_kwargs)
# Sort song files by recording:
sorted_paths = sort_files_by_rec(paths, sources, return_dict=True)
# Count number of available sources:
n_sources[species] = len(sorted_paths)
# Count number of available recordings:
n_recs[species] = sum(len(rec_paths) for rec_paths in sorted_paths.values())
# Count number of available songs:
n_songs[species] = sum(sum(len(paths) for paths in rec_paths) for rec_paths in sorted_paths.values())
print(f'Found {n_songs[species]} songs from {n_recs[species]} recordings across {n_sources[species]} sources.')