208 lines
6.2 KiB
Python
208 lines
6.2 KiB
Python
import numpy as np
|
|
import datajoint as dj
|
|
import nixio as nix
|
|
import os
|
|
import glob
|
|
from util import read_info_file, find_key_recursive, deep_get
|
|
from IPython import embed
|
|
import datetime as dt
|
|
|
|
data_dir = 'data'
|
|
schema = dj.schema("fish_book", locals())
|
|
|
|
|
|
@schema
|
|
class Dataset(dj.Manual):
|
|
definition = """ # Dataset
|
|
dataset_id : varchar(256)
|
|
----
|
|
data_source : varchar(512) # path to the dataset
|
|
experimenter : varchar(512)
|
|
recording_date : date
|
|
has_nix : bool
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(id=None):
|
|
if id is not None:
|
|
d = dict((Dataset() & {"dataset_id": id}).fetch1())
|
|
return d
|
|
return dict(dataset_id=None, data_source="", experimenter="", recording_date=None, has_nix=False)
|
|
|
|
@staticmethod
|
|
def get_nix_file(key):
|
|
dset = (Dataset() & key).fetch1()
|
|
if dset["ignore"]:
|
|
return None
|
|
file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix")
|
|
if not (os.path.exists(file_path)):
|
|
print("\t No nix file found for path: %s" % dset["data_source"])
|
|
return None
|
|
if not Dataset.check_file_integrity(file_path):
|
|
return None
|
|
return file_path
|
|
|
|
@staticmethod
|
|
def check_file_integrity(nix_file):
|
|
sane = True
|
|
try:
|
|
f = nix.File.open(nix_file, nix.FileMode.ReadOnly)
|
|
b = f.blocks[0]
|
|
m = b.metadata
|
|
if "Recording" not in m.sections:
|
|
Warning("\t Could not find Recording section in dataset: %s" % nix_file)
|
|
sane = False
|
|
f.close()
|
|
except ():
|
|
print("file: %s is NOT SANE!")
|
|
sane = False
|
|
return sane
|
|
|
|
|
|
|
|
@schema
|
|
class Subject(dj.Manual):
|
|
definition = """
|
|
# Subject
|
|
subject_id : varchar(256)
|
|
----
|
|
species : varchar(256)
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(subject_id=None):
|
|
tup = dict(subject_id=None, species="")
|
|
if subject_id is not None:
|
|
d = dict((Subject() & {"subject_id": subject_id}).fetch1())
|
|
return d
|
|
return tup
|
|
|
|
def make(self, key):
|
|
file_path = Dataset.get_nix_file(key)
|
|
if file_path is None:
|
|
return
|
|
nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly)
|
|
m = nix_file.blocks[0].metadata
|
|
inserts = Subject.get_template_tuple()
|
|
subj_info = m["Recording"]["Subject"]
|
|
inserts["subject_id"] = subj_info["Identifier"]
|
|
inserts["species"] = subj_info["Species"][0]
|
|
inserts["weight"] = subj_info["Weight"]
|
|
inserts["size"] = subj_info["Size"]
|
|
inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10
|
|
inserts.update(key)
|
|
self.insert1(inserts, skip_duplicates=True)
|
|
nix_file.close()
|
|
|
|
|
|
@schema
|
|
class SubjectDatasetMap(dj.Manual):
|
|
definition = """
|
|
# SubjectDatasetMap
|
|
-> Subject
|
|
-> Dataset
|
|
"""
|
|
|
|
|
|
@schema
|
|
class SubjectProperties(dj.Manual):
|
|
definition = """
|
|
# SubjectProperties
|
|
id : int auto_increment
|
|
----
|
|
-> Subject
|
|
recording_date : date
|
|
weight : float
|
|
size : float
|
|
eod_frequency : float
|
|
"""
|
|
|
|
def get_template_tuple(id=None):
|
|
tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0)
|
|
if id is not None:
|
|
return dict((SubjectProperties() & {"id": id}).fetch1())
|
|
return tup
|
|
|
|
|
|
def read_info(info_file):
|
|
if not os.path.exists(info_file):
|
|
return None, None, False
|
|
has_nix = len(glob.glob(os.path.sep.join(info_file.split(os.path.sep)[:-1]) + os.path.sep + "*.nix")) > 0
|
|
info = read_info_file(info_file)
|
|
p = []
|
|
find_key_recursive(info, "Experimenter", p)
|
|
if len(p) > 0:
|
|
exp = deep_get(info, p)
|
|
p = []
|
|
find_key_recursive(info, "Date", p)
|
|
if len(p) > 0:
|
|
rec_date = dt.date.fromisoformat(deep_get(info, p))
|
|
return exp, rec_date, has_nix
|
|
|
|
|
|
def populate_datasets(data_path):
|
|
print("Importing dataset %s" % data_path)
|
|
if not os.path.exists(data_path):
|
|
return
|
|
dset_name = os.path.split(data_path)[-1]
|
|
experimenter, rec_date, has_nix = read_info(os.path.join(data_path, 'info.dat'))
|
|
if not experimenter:
|
|
return
|
|
|
|
inserts = Dataset.get_template_tuple()
|
|
inserts["dataset_id"] = dset_name
|
|
inserts["data_source"] = data_path
|
|
inserts["experimenter"] = experimenter
|
|
inserts["recording_date"] = rec_date
|
|
inserts["has_nix"] = has_nix
|
|
|
|
Dataset().insert1(inserts, skip_duplicates=True)
|
|
|
|
|
|
def populate_subjects(data_path):
|
|
print("Importing subject(s) of %s" % data_path)
|
|
dset_name = os.path.split(data_path)[-1]
|
|
info_file = os.path.join(data_path, 'info.dat')
|
|
if not os.path.exists(info_file):
|
|
return None, None, False
|
|
info = read_info_file(info_file)
|
|
p = []
|
|
find_key_recursive(info, "Subject", p)
|
|
if len(p) > 0:
|
|
subj = deep_get(info, p)
|
|
inserts = Subject.get_template_tuple()
|
|
inserts["subject_id"] = subj["Identifier"]
|
|
inserts["species"] = subj["Species"]
|
|
Subject().insert1(inserts, skip_duplicates=True)
|
|
|
|
# multi mach entry
|
|
dataset = dict((Dataset() & {"dataset_id": dset_name}).fetch1())
|
|
mm = dict(dataset_id=dataset["dataset_id"], subject_id=subj["Identifier"])
|
|
SubjectDatasetMap.insert1(mm, skip_duplicates=True)
|
|
|
|
# subject properties
|
|
props = SubjectProperties.get_template_tuple()
|
|
props["subject_id"] = subj["Identifier"]
|
|
props["recording_date"] = dataset["recording_date"]
|
|
if "Weight" in subj.keys():
|
|
props["weight"] = np.round(float(subj["Weight"][:-1]), 1)
|
|
if "Size" in subj.keys():
|
|
props["size"] = np.round(float(subj["Size"][:-2]), 1)
|
|
if "EOD Frequency" in subj.keys():
|
|
props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2]))
|
|
p = props.copy()
|
|
p.pop("id")
|
|
if len(SubjectProperties & p) == 0:
|
|
SubjectProperties.insert1(props, skip_duplicates=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
datasets = glob.glob('/data/apteronotus/2018-05-08*')
|
|
# Dataset.drop()
|
|
# Subject.drop()
|
|
# SubjectProperties.drop()
|
|
# SubjectDatasetMap.drop()
|
|
for d in datasets:
|
|
populate_datasets(d)
|
|
populate_subjects(d)
|