diff --git a/database.py b/database.py index 6edbe51..ebafd93 100644 --- a/database.py +++ b/database.py @@ -13,7 +13,7 @@ schema = dj.schema("fish_book", locals()) @schema class Dataset(dj.Manual): - definition = """ # Datasets + definition = """ # Dataset dataset_id : varchar(256) ---- data_source : varchar(512) # path to the dataset @@ -59,6 +59,71 @@ class Dataset(dj.Manual): return sane + +@schema +class Subject(dj.Manual): + definition = """ + # Subject + subject_id : varchar(256) + ---- + species : varchar(256) + """ + + @staticmethod + def get_template_tuple(subject_id=None): + tup = dict(subject_id=None, species="") + if subject_id is not None: + d = dict((Subject() & {"subject_id": subject_id}).fetch1()) + return d + return tup + + def make(self, key): + file_path = Dataset.get_nix_file(key) + if file_path is None: + return + nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly) + m = nix_file.blocks[0].metadata + inserts = Subject.get_template_tuple() + subj_info = m["Recording"]["Subject"] + inserts["subject_id"] = subj_info["Identifier"] + inserts["species"] = subj_info["Species"][0] + inserts["weight"] = subj_info["Weight"] + inserts["size"] = subj_info["Size"] + inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10 + inserts.update(key) + self.insert1(inserts, skip_duplicates=True) + nix_file.close() + + +@schema +class SubjectDatasetMap(dj.Manual): + definition = """ + # SubjectDatasetMap + -> Subject + -> Dataset + """ + + +@schema +class SubjectProperties(dj.Manual): + definition = """ + # SubjectProperties + id : int auto_increment + ---- + -> Subject + recording_date : date + weight : float + size : float + eod_frequency : float + """ + + def get_template_tuple(id=None): + tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0) + if id is not None: + return dict((SubjectProperties() & {"id": id}).fetch1()) + return tup + + def read_info(info_file): if not os.path.exists(info_file): return None, None, False @@ -94,7 +159,49 @@ def populate_datasets(data_path): Dataset().insert1(inserts, skip_duplicates=True) +def populate_subjects(data_path): + print("Importing subject(s) of %s" % data_path) + dset_name = os.path.split(data_path)[-1] + info_file = os.path.join(data_path, 'info.dat') + if not os.path.exists(info_file): + return None, None, False + info = read_info_file(info_file) + p = [] + find_key_recursive(info, "Subject", p) + if len(p) > 0: + subj = deep_get(info, p) + inserts = Subject.get_template_tuple() + inserts["subject_id"] = subj["Identifier"] + inserts["species"] = subj["Species"] + Subject().insert1(inserts, skip_duplicates=True) + + # multi mach entry + dataset = dict((Dataset() & {"dataset_id": dset_name}).fetch1()) + mm = dict(dataset_id=dataset["dataset_id"], subject_id=subj["Identifier"]) + SubjectDatasetMap.insert1(mm, skip_duplicates=True) + + # subject properties + props = SubjectProperties.get_template_tuple() + props["subject_id"] = subj["Identifier"] + props["recording_date"] = dataset["recording_date"] + if "Weight" in subj.keys(): + props["weight"] = np.round(float(subj["Weight"][:-1]), 1) + if "Size" in subj.keys(): + props["size"] = np.round(float(subj["Size"][:-2]), 1) + if "EOD Frequency" in subj.keys(): + props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2])) + p = props.copy() + p.pop("id") + if len(SubjectProperties & p) == 0: + SubjectProperties.insert1(props, skip_duplicates=True) + + if __name__ == "__main__": datasets = glob.glob('/data/apteronotus/2018-05-08*') + # Dataset.drop() + # Subject.drop() + # SubjectProperties.drop() + # SubjectDatasetMap.drop() for d in datasets: populate_datasets(d) + populate_subjects(d)