add subject props, dataset multimatch table

This commit is contained in:
Jan Grewe 2019-03-08 14:34:34 +01:00
parent 358a84f435
commit 0b46108cf7

View File

@ -13,7 +13,7 @@ schema = dj.schema("fish_book", locals())
@schema
class Dataset(dj.Manual):
definition = """ # Datasets
definition = """ # Dataset
dataset_id : varchar(256)
----
data_source : varchar(512) # path to the dataset
@ -59,6 +59,71 @@ class Dataset(dj.Manual):
return sane
@schema
class Subject(dj.Manual):
definition = """
# Subject
subject_id : varchar(256)
----
species : varchar(256)
"""
@staticmethod
def get_template_tuple(subject_id=None):
tup = dict(subject_id=None, species="")
if subject_id is not None:
d = dict((Subject() & {"subject_id": subject_id}).fetch1())
return d
return tup
def make(self, key):
file_path = Dataset.get_nix_file(key)
if file_path is None:
return
nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly)
m = nix_file.blocks[0].metadata
inserts = Subject.get_template_tuple()
subj_info = m["Recording"]["Subject"]
inserts["subject_id"] = subj_info["Identifier"]
inserts["species"] = subj_info["Species"][0]
inserts["weight"] = subj_info["Weight"]
inserts["size"] = subj_info["Size"]
inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10
inserts.update(key)
self.insert1(inserts, skip_duplicates=True)
nix_file.close()
@schema
class SubjectDatasetMap(dj.Manual):
definition = """
# SubjectDatasetMap
-> Subject
-> Dataset
"""
@schema
class SubjectProperties(dj.Manual):
definition = """
# SubjectProperties
id : int auto_increment
----
-> Subject
recording_date : date
weight : float
size : float
eod_frequency : float
"""
def get_template_tuple(id=None):
tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0)
if id is not None:
return dict((SubjectProperties() & {"id": id}).fetch1())
return tup
def read_info(info_file):
if not os.path.exists(info_file):
return None, None, False
@ -94,7 +159,49 @@ def populate_datasets(data_path):
Dataset().insert1(inserts, skip_duplicates=True)
def populate_subjects(data_path):
print("Importing subject(s) of %s" % data_path)
dset_name = os.path.split(data_path)[-1]
info_file = os.path.join(data_path, 'info.dat')
if not os.path.exists(info_file):
return None, None, False
info = read_info_file(info_file)
p = []
find_key_recursive(info, "Subject", p)
if len(p) > 0:
subj = deep_get(info, p)
inserts = Subject.get_template_tuple()
inserts["subject_id"] = subj["Identifier"]
inserts["species"] = subj["Species"]
Subject().insert1(inserts, skip_duplicates=True)
# multi mach entry
dataset = dict((Dataset() & {"dataset_id": dset_name}).fetch1())
mm = dict(dataset_id=dataset["dataset_id"], subject_id=subj["Identifier"])
SubjectDatasetMap.insert1(mm, skip_duplicates=True)
# subject properties
props = SubjectProperties.get_template_tuple()
props["subject_id"] = subj["Identifier"]
props["recording_date"] = dataset["recording_date"]
if "Weight" in subj.keys():
props["weight"] = np.round(float(subj["Weight"][:-1]), 1)
if "Size" in subj.keys():
props["size"] = np.round(float(subj["Size"][:-2]), 1)
if "EOD Frequency" in subj.keys():
props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2]))
p = props.copy()
p.pop("id")
if len(SubjectProperties & p) == 0:
SubjectProperties.insert1(props, skip_duplicates=True)
if __name__ == "__main__":
datasets = glob.glob('/data/apteronotus/2018-05-08*')
# Dataset.drop()
# Subject.drop()
# SubjectProperties.drop()
# SubjectDatasetMap.drop()
for d in datasets:
populate_datasets(d)
populate_subjects(d)