diff --git a/database.py b/database.py index 9aa3c80..c9fefb1 100644 --- a/database.py +++ b/database.py @@ -3,8 +3,8 @@ import datajoint as dj import nixio as nix import os import glob -from util import read_info_file, find_key_recursive, deep_get, read_dataset_info, \ - nix_metadata_to_yaml +import util as ut + from IPython import embed schema = dj.schema("fish_book", locals()) @@ -56,6 +56,7 @@ class Dataset(dj.Manual): sane = False f.close() except (): + f = None print("file: %s is NOT SANE!") sane = False return sane @@ -176,7 +177,7 @@ class RePro(dj.Manual): @staticmethod def get_template_tuple(repro_id=None): - tup = dict(repro_id=None, dataset_id=None, run=0, repro_name="", settings="") + tup = dict(repro_id=None, dataset_id=None, run=0, repro_name="", settings=None) if repro_id is not None: d = dict((RePro() & {"repro_id": repro_id}).fetch1()) return d @@ -188,9 +189,9 @@ def populate_datasets(data_path): if not os.path.exists(data_path): return dset_name = os.path.split(data_path)[-1] - experimenter, rec_date, quality, comment, has_nix = read_dataset_info(os.path.join(data_path, 'info.dat')) + experimenter, rec_date, quality, comment, has_nix = ut.read_dataset_info(os.path.join(data_path, 'info.dat')) if not experimenter: - return + return False inserts = Dataset.get_template_tuple() inserts["dataset_id"] = dset_name @@ -201,19 +202,20 @@ def populate_datasets(data_path): inserts["comment"] = comment inserts["has_nix"] = has_nix Dataset().insert1(inserts, skip_duplicates=True) + return True def populate_subjects(data_path): - print("Importing subject(s) of %s" % data_path) + print("\tImporting subject(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False - info = read_info_file(info_file) + info = ut.read_info_file(info_file) p = [] - find_key_recursive(info, "Subject", p) + ut.find_key_recursive(info, "Subject", p) if len(p) > 0: - subj = deep_get(info, p) + subj = ut.deep_get(info, p) inserts = Subject.get_template_tuple() inserts["subject_id"] = subj["Identifier"] inserts["species"] = subj["Species"] @@ -241,23 +243,23 @@ def populate_subjects(data_path): def populate_cells(data_path): - print("Importing cell(s) of %s" % data_path) + print("\tImporting cell(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] info_file = os.path.join(data_path, 'info.dat') if not os.path.exists(info_file): return None, None, False - info = read_info_file(info_file) + info = ut.read_info_file(info_file) p = [] - find_key_recursive(info, "Subject", p) - subject_info = deep_get(info, p) + ut.find_key_recursive(info, "Subject", p) + subject_info = ut.deep_get(info, p) p = [] - find_key_recursive(info, "Cell", p) - cell_info = deep_get(info, p) + ut.find_key_recursive(info, "Cell", p) + cell_info = ut.deep_get(info, p) p = [] - find_key_recursive(info, "Firing Rate1", p) - firing_rate = deep_get(info, p, default=0.0) + ut.find_key_recursive(info, "Firing Rate1", p) + firing_rate = ut.deep_get(info, p, default=0.0) if isinstance(firing_rate, str): firing_rate = float(firing_rate[:-2]) @@ -292,29 +294,43 @@ def populate_cells(data_path): def populate_repros(data_path): - print("Importing RePro(s) of %s" % data_path) + print("\tImporting RePro(s) of %s" % data_path) dset_name = os.path.split(data_path)[-1] + if len(Dataset & {"dataset_id": dset_name}) != 1: + return False dataset = dict((Dataset & {"dataset_id": dset_name}).fetch1()) if dataset["has_nix"]: - print("scanning nix file") + print("\t\tscanning nix file") nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix")) for nf in nix_files: + if not Dataset.check_file_integrity(nf): + print("file is not sane!!!") + continue f = nix.File.open(nf, nix.FileMode.ReadOnly) b = f.blocks[0] for t in b.tags: - if "relacs.repro_run" in t.type and "RePro-Info" in t.metadata.sections: + if "relacs.repro_run" in t.type: + rs = t.metadata.find_sections(lambda x: "Run" in x.props) + if len(rs) == 0: + continue + rs = rs[0] rp = RePro.get_template_tuple() - rp["run"] = t.metadata["RePro-Info"]["Run"] + rp["run"] = rs["Run"] + rp["repro_name"] = rs["RePro"] rp["dataset_id"] = dataset["dataset_id"] rp["repro_id"] = t.name - rp["repro_name"] = t.metadata["RePro-Info"]["RePro"] - rp["settings"] = nix_metadata_to_yaml(t.metadata.sections[0]) + settings = t.metadata.find_sections(lambda x: "settings" in x.type) + if len(settings) > 0: + rp["settings"] = ut.nix_metadata_to_yaml(settings[0]) + else: + rp["settings"] = ut.nix_metadata_to_yaml(t.metadata) RePro.insert1(rp, skip_duplicates=True) f.close() f = None else: pass + return True def drop_tables(): @@ -324,14 +340,19 @@ def drop_tables(): def populate(datasets): for d in datasets: - #populate_datasets(d) - #populate_subjects(d) - #populate_cells(d) - populate_repros(d) + if not populate_datasets(d): + continue + populate_subjects(d) + populate_cells(d) + try: + populate_repros(d) + except (): + print("something went wrong! %s" % d) if __name__ == "__main__": - data_dir = "../../science/high_frequency_chirps/data" + # data_dir = "../../science/high_frequency_chirps/data" + data_dir = "../high_freq_chirps/data" datasets = glob.glob(os.path.join(data_dir, '2018*')) # drop_tables() populate(datasets)