renaming, backend entities and tables

This commit is contained in:
Jan Grewe 2019-09-17 14:15:44 +02:00
parent 67d78a8581
commit 4ec35ee97b
2 changed files with 70 additions and 70 deletions

View File

@ -3,13 +3,13 @@ import nixio as nix
import os import os
import numpy as np import numpy as np
from IPython import embed from IPython import embed
from database import Dataset, Repro from database import _Dataset, _Repro
schema = dj.schema("fish_book", locals()) schema = dj.schema("fish_book", locals())
class BaselineData(object): class BaselineData(object):
def __init__(self, dataset:Dataset): def __init__(self, dataset:_Dataset):
self.__data = [] self.__data = []
self.__dataset = dataset self.__dataset = dataset
self._get_data() self._get_data()
@ -20,11 +20,11 @@ class BaselineData(object):
self.__data = [] self.__data = []
self.__data = [] self.__data = []
repros = (Repro & self.__dataset & "repro_name like 'BaselineActivity%'") repros = (_Repro & self.__dataset & "repro_name like 'BaselineActivity%'")
for r in repros: for r in repros:
self.__data.append(self.__read_data(r)) self.__data.append(self.__read_data(r))
def __read_data(self, r:Repro): def __read_data(self, r:_Repro):
if self.__dataset["has_nix"]: if self.__dataset["has_nix"]:
return self.__read_data_from_nix(r) return self.__read_data_from_nix(r)
else: else:
@ -89,6 +89,6 @@ class BaselineData(object):
if __name__ == "__main__": if __name__ == "__main__":
print("Test") print("Test")
dataset = Dataset & "dataset_id like '2018-11-09-aa-%' " dataset = _Dataset & "dataset_id like '2018-11-09-aa-%' "
baseline = BaselineData(dataset.fetch1()) baseline = BaselineData(dataset.fetch1())
embed() embed()

View File

@ -9,12 +9,12 @@ import yaml
from IPython import embed from IPython import embed
schema = dj.schema("fish_book", locals()) schema = dj.schema("fish_book_new", locals())
@schema @schema
class Dataset(dj.Manual):
definition = """ # Dataset definition = """ # Dataset
class _Dataset(dj.Manual):
dataset_id : varchar(256) dataset_id : varchar(256)
---- ----
data_source : varchar(512) # path to the dataset data_source : varchar(512) # path to the dataset
@ -28,21 +28,21 @@ class Dataset(dj.Manual):
@staticmethod @staticmethod
def get_template_tuple(id=None): def get_template_tuple(id=None):
if id is not None: if id is not None:
d = dict((Dataset() & {"dataset_id": id}).fetch1()) d = dict((_Dataset() & {"dataset_id": id}).fetch1())
return d return d
return dict(dataset_id=None, data_source="", experimenter="", recording_date=None, return dict(dataset_id=None, data_source="", experimenter="", recording_date=None,
quality="", comment="", has_nix=False) quality="", comment="", has_nix=False)
@staticmethod @staticmethod
def get_nix_file(key): def get_nix_file(key):
dset = (Dataset() & key).fetch1() dset = (_Dataset() & key).fetch1()
if dset["ignore"]: if dset["ignore"]:
return None return None
file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix") file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix")
if not (os.path.exists(file_path)): if not (os.path.exists(file_path)):
print("\t No nix file found for path: %s" % dset["data_source"]) print("\t No nix file found for path: %s" % dset["data_source"])
return None return None
if not Dataset.check_file_integrity(file_path): if not _Dataset.check_file_integrity(file_path):
return None return None
return file_path return file_path
@ -66,19 +66,19 @@ class Dataset(dj.Manual):
@property @property
def cells(self, restrictions:dict=None): def cells(self, restrictions:dict=None):
cs = (Cell & (CellDatasetMap & self) & restrictions).fetch() cs = (_Cell & (CellDatasetMap & self) & restrictions).fetch()
return cs return cs
@property @property
def subjects(self, restrictions:dict=None): def subjects(self, restrictions:dict=None):
subjs = (Subject & (SubjectDatasetMap & self) & restrictions).fetch() subjs = (_Subject & (SubjectDatasetMap & self) & restrictions).fetch()
return subjs return subjs
@schema @schema
class Subject(dj.Manual): class _Subject(dj.Manual):
definition = """ definition = """
# Subject # _Subject
subject_id : varchar(256) subject_id : varchar(256)
---- ----
species : varchar(256) species : varchar(256)
@ -88,17 +88,17 @@ class Subject(dj.Manual):
def get_template_tuple(subject_id=None): def get_template_tuple(subject_id=None):
tup = dict(subject_id=None, species="") tup = dict(subject_id=None, species="")
if subject_id is not None: if subject_id is not None:
d = dict((Subject() & {"subject_id": subject_id}).fetch1()) d = dict((_Subject() & {"subject_id": subject_id}).fetch1())
return d return d
return tup return tup
def make(self, key): def make(self, key):
file_path = Dataset.get_nix_file(key) file_path = _Dataset.get_nix_file(key)
if file_path is None: if file_path is None:
return return
nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly) nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly)
m = nix_file.blocks[0].metadata m = nix_file.blocks[0].metadata
inserts = Subject.get_template_tuple() inserts = _Subject.get_template_tuple()
subj_info = m["Recording"]["Subject"] subj_info = m["Recording"]["Subject"]
inserts["subject_id"] = subj_info["Identifier"] inserts["subject_id"] = subj_info["Identifier"]
inserts["species"] = subj_info["Species"][0] inserts["species"] = subj_info["Species"][0]
@ -113,24 +113,24 @@ class Subject(dj.Manual):
def subjects(species=None): def subjects(species=None):
subjs = [] subjs = []
if species: if species:
subjs = (Subject & "species like '%{0:s}%'".format(species)).fetch() subjs = (_Subject & "species like '%{0:s}%'".format(species)).fetch()
else: else:
subjs = (Subject & True).fetch() subjs = (_Subject & True).fetch()
return subjs return subjs
@staticmethod @staticmethod
def unique_species(): def unique_species():
all_species = (Subject &True).fetch("species") all_species = (_Subject & True).fetch("species")
return np.unique(all_species) return np.unique(all_species)
@property @property
def cells(self): def cells(self):
cs = Cell & self cs = _Cell & self
return cs return cs
@property @property
def properties(self): def properties(self):
return (SubjectProperties & self).fetch1() return (_SubjectProperties & self).fetch1()
#@property #@property
#def datasets(self): #def datasets(self):
@ -140,18 +140,18 @@ class Subject(dj.Manual):
class SubjectDatasetMap(dj.Manual): class SubjectDatasetMap(dj.Manual):
definition = """ definition = """
# SubjectDatasetMap # SubjectDatasetMap
-> Subject -> _Subject
-> Dataset -> _Dataset
""" """
@schema @schema
class SubjectProperties(dj.Manual): class _SubjectProperties(dj.Manual):
definition = """ definition = """
# SubjectProperties # _SubjectProperties
id : int auto_increment id : int auto_increment
---- ----
-> Subject -> _Subject
recording_date : date recording_date : date
weight : float weight : float
size : float size : float
@ -162,17 +162,17 @@ class SubjectProperties(dj.Manual):
tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0,
eod_frequency=0.0) eod_frequency=0.0)
if id is not None: if id is not None:
return dict((SubjectProperties() & {"id": id}).fetch1()) return dict((_SubjectProperties() & {"id": id}).fetch1())
return tup return tup
@schema @schema
class Cell(dj.Manual): class _Cell(dj.Manual):
definition = """ definition = """
# Table that stores information about recorded cells. # Table that stores information about recorded cells.
cell_id : varchar(256) cell_id : varchar(256)
---- ----
-> Subject -> _Subject
cell_type : varchar(256) cell_type : varchar(256)
firing_rate : float firing_rate : float
structure : varchar(256) structure : varchar(256)
@ -189,21 +189,21 @@ class Cell(dj.Manual):
depth=0.0, region="", subregion="", structure="", depth=0.0, region="", subregion="", structure="",
lateral_pos=0.0, transversal_section=0.0) lateral_pos=0.0, transversal_section=0.0)
if cell_id is not None: if cell_id is not None:
d = dict((Cell() & {"cell_id": cell_id}).fetch1()) d = dict((_Cell() & {"cell_id": cell_id}).fetch1())
return d return d
return tup return tup
@property @property
def subject(self): def subject(self):
return Subject & self return _Subject & self
@staticmethod @staticmethod
def celltypes(): def celltypes():
return np.unique(Cell.fetch("cell_type")) return np.unique(_Cell.fetch("cell_type"))
@staticmethod @staticmethod
def cells(celltype=None, species=None, quality="good"): def cells(celltype=None, species=None, quality="good"):
cs = Cell * CellDatasetMap * Dataset * Subject cs = _Cell * CellDatasetMap * _Dataset * _Subject
if celltype: if celltype:
cs = cs & "cell_type like '{0:s}'".format(celltype) cs = cs & "cell_type like '{0:s}'".format(celltype)
if species: if species:
@ -217,17 +217,17 @@ class Cell(dj.Manual):
class CellDatasetMap(dj.Manual): class CellDatasetMap(dj.Manual):
definition = """ definition = """
# Table that maps recorded cells to datasets # Table that maps recorded cells to datasets
-> Dataset -> _Dataset
-> Cell -> _Cell
""" """
@schema @schema
class Repro(dj.Manual): class _Repro(dj.Manual):
definition = """ definition = """
repro_id : varchar(512) # The name that was given to the RePro run by relacs repro_id : varchar(512) # The name that was given to the RePro run by relacs
run : smallint # A counter counting the runs of the ReProp in this dataset run : smallint # A counter counting the runs of the ReProp in this dataset
-> Dataset # this is wrong! should be cell!?! In our case it is the same... -> _Cell #
---- ----
repro_name : varchar(512) # The original name of the RePro itself, not any given name by user or relacs repro_name : varchar(512) # The original name of the RePro itself, not any given name by user or relacs
settings : varchar(3000) # Yaml formatted string containing the repro settings (tag.metadata in case of a nix file) settings : varchar(3000) # Yaml formatted string containing the repro settings (tag.metadata in case of a nix file)
@ -239,16 +239,16 @@ class Repro(dj.Manual):
def get_template_tuple(repro_id=None): def get_template_tuple(repro_id=None):
tup = dict(repro_id=None, dataset_id=None, run=0, repro_name="", settings=None, start=None, duration=None) tup = dict(repro_id=None, dataset_id=None, run=0, repro_name="", settings=None, start=None, duration=None)
if repro_id is not None: if repro_id is not None:
d = dict((Repro() & {"repro_id": repro_id}).fetch1()) d = dict((_Repro() & {"repro_id": repro_id}).fetch1())
return d return d
return tup return tup
@schema @schema
class Stimulus(dj.Manual): class _Stimulus(dj.Manual):
definition = """ definition = """
stimulus_id : varchar(50) stimulus_id : varchar(50)
-> Repro -> _Repro
--- ---
stimulus_index : int stimulus_index : int
stimulus_name : varchar(512) stimulus_name : varchar(512)
@ -262,7 +262,7 @@ class Stimulus(dj.Manual):
@staticmethod @staticmethod
def get_template_tuple(stimulus_id=None): def get_template_tuple(stimulus_id=None):
if stimulus_id is not None: if stimulus_id is not None:
tup = dict((Stimulus & {"stimulus_id": stimulus_id}).fetch1()) tup = dict((_Stimulus & {"stimulus_id": stimulus_id}).fetch1())
else: else:
tup = dict(stimulus_id=None, stimulus_index=None, stimulus_name="", start_index=0, start_time=0.0, tup = dict(stimulus_id=None, stimulus_index=None, stimulus_name="", start_index=0, start_time=0.0,
duration=0.0, settings=None) duration=0.0, settings=None)
@ -277,7 +277,7 @@ def populate_datasets(data_path, update=False):
if not experimenter: if not experimenter:
return False return False
inserts = Dataset.get_template_tuple() inserts = _Dataset.get_template_tuple()
inserts["dataset_id"] = dset_name inserts["dataset_id"] = dset_name
inserts["data_source"] = data_path inserts["data_source"] = data_path
inserts["experimenter"] = experimenter inserts["experimenter"] = experimenter
@ -285,10 +285,10 @@ def populate_datasets(data_path, update=False):
inserts["quality"] = quality if not isinstance(quality, dict) else "" inserts["quality"] = quality if not isinstance(quality, dict) else ""
inserts["comment"] = comment if not isinstance(comment, dict) else "" inserts["comment"] = comment if not isinstance(comment, dict) else ""
inserts["has_nix"] = has_nix inserts["has_nix"] = has_nix
if len(Dataset & inserts) > 0 and not update: if len(_Dataset & inserts) > 0 and not update:
print('\t\t %s is already in database!' % dset_name) print('\t\t %s is already in database!' % dset_name)
return False return False
Dataset().insert1(inserts, skip_duplicates=True) _Dataset().insert1(inserts, skip_duplicates=True)
return True return True
@ -306,7 +306,7 @@ def populate_subjects(data_path):
if len(p) > 0: if len(p) > 0:
subj = ut.deep_get(info, p) subj = ut.deep_get(info, p)
inserts = Subject.get_template_tuple() inserts = _Subject.get_template_tuple()
subj_id = None subj_id = None
if "Identifier" in subj.keys(): if "Identifier" in subj.keys():
if isinstance(subj["Identifier"], dict): if isinstance(subj["Identifier"], dict):
@ -322,15 +322,15 @@ def populate_subjects(data_path):
subj_id = "unspecified_" + dset_name subj_id = "unspecified_" + dset_name
inserts["subject_id"] = subj_id inserts["subject_id"] = subj_id
inserts["species"] = subj["Species"] inserts["species"] = subj["Species"]
Subject().insert1(inserts, skip_duplicates=True) _Subject().insert1(inserts, skip_duplicates=True)
# multi match entry # multi match entry
dataset = dict((Dataset() & {"dataset_id": dset_name}).fetch1()) dataset = dict((_Dataset() & {"dataset_id": dset_name}).fetch1())
mm = dict(dataset_id=dataset["dataset_id"], subject_id=inserts["subject_id"]) mm = dict(dataset_id=dataset["dataset_id"], subject_id=inserts["subject_id"])
SubjectDatasetMap.insert1(mm, skip_duplicates=True) SubjectDatasetMap.insert1(mm, skip_duplicates=True)
# subject properties # subject properties
props = SubjectProperties.get_template_tuple() props = _SubjectProperties.get_template_tuple()
props["subject_id"] = inserts["subject_id"] props["subject_id"] = inserts["subject_id"]
props["recording_date"] = dataset["recording_date"] props["recording_date"] = dataset["recording_date"]
if "Weight" in subj.keys(): if "Weight" in subj.keys():
@ -341,8 +341,8 @@ def populate_subjects(data_path):
props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2])) props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2]))
p = props.copy() p = props.copy()
p.pop("id") p.pop("id")
if len(SubjectProperties & p) == 0: if len(_SubjectProperties & p) == 0:
SubjectProperties.insert1(props, skip_duplicates=True) _SubjectProperties.insert1(props, skip_duplicates=True)
def populate_cells(data_path): def populate_cells(data_path):
@ -379,12 +379,12 @@ def populate_cells(data_path):
subj_id = info["Identifier"] subj_id = info["Identifier"]
else: else:
subj_id = "unspecified_" + dset_name subj_id = "unspecified_" + dset_name
dataset = dict((Dataset & {"dataset_id": dset_name}).fetch1()) dataset = dict((_Dataset & {"dataset_id": dset_name}).fetch1())
subject = dict((Subject & {"subject_id": subj_id}).fetch1()) subject = dict((_Subject & {"subject_id": subj_id}).fetch1())
dataset_id = dataset["dataset_id"] dataset_id = dataset["dataset_id"]
cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id
cell_props = Cell.get_template_tuple() cell_props = _Cell.get_template_tuple()
cell_props["subject_id"] = subject["subject_id"] cell_props["subject_id"] = subject["subject_id"]
cell_props["cell_id"] = cell_id cell_props["cell_id"] = cell_id
cell_props["cell_type"] = cell_info["CellType"] cell_props["cell_type"] = cell_info["CellType"]
@ -402,7 +402,7 @@ def populate_cells(data_path):
if "Transverse section" in cell_info.keys(): if "Transverse section" in cell_info.keys():
cell_props["transversal_section"] = float(cell_info["Transverse section"]) cell_props["transversal_section"] = float(cell_info["Transverse section"])
Cell.insert1(cell_props, skip_duplicates=True) _Cell.insert1(cell_props, skip_duplicates=True)
# multi mach entry # multi mach entry
mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"]) mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"])
@ -413,7 +413,7 @@ def scan_nix_file_for_repros(dataset):
print("\t\tscanning nix file") print("\t\tscanning nix file")
nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix")) nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix"))
for nf in nix_files: for nf in nix_files:
if not Dataset.check_file_integrity(nf): if not _Dataset.check_file_integrity(nf):
print("\t\tfile is not sane!!!") print("\t\tfile is not sane!!!")
continue continue
f = nix.File.open(nf, nix.FileMode.ReadOnly) f = nix.File.open(nf, nix.FileMode.ReadOnly)
@ -426,7 +426,7 @@ def scan_nix_file_for_repros(dataset):
rs = rs[0] rs = rs[0]
print("\t\t%s" % rs["RePro"]) print("\t\t%s" % rs["RePro"])
rp = Repro.get_template_tuple() rp = _Repro.get_template_tuple()
rp["run"] = rs["Run"] rp["run"] = rs["Run"]
rp["repro_name"] = rs["RePro"] rp["repro_name"] = rs["RePro"]
rp["dataset_id"] = dataset["dataset_id"] rp["dataset_id"] = dataset["dataset_id"]
@ -438,10 +438,10 @@ def scan_nix_file_for_repros(dataset):
rp["settings"] = ut.nix_metadata_to_yaml(t.metadata) rp["settings"] = ut.nix_metadata_to_yaml(t.metadata)
rp["start"] = t.position[0] rp["start"] = t.position[0]
rp["duration"] = t.extent[0] rp["duration"] = t.extent[0]
Repro.insert1(rp, skip_duplicates=True) _Repro.insert1(rp, skip_duplicates=True)
# import Stimuli # import Stimuli
repro = dict((Repro & dict(repro_id=rp["repro_id"], dataset_id=rp["dataset_id"])).fetch1()) repro = dict((_Repro & dict(repro_id=rp["repro_id"], dataset_id=rp["dataset_id"])).fetch1())
repro.pop("settings") repro.pop("settings")
repro.pop("repro_name") repro.pop("repro_name")
repro.pop("start") repro.pop("start")
@ -456,7 +456,7 @@ def scan_nix_file_for_repros(dataset):
stim_start = mt_positions[p, 0] stim_start = mt_positions[p, 0]
stim_duration = mt_extents[p, 0] stim_duration = mt_extents[p, 0]
stim = Stimulus.get_template_tuple() stim = _Stimulus.get_template_tuple()
stim["stimulus_id"] = str(uuid.uuid1()) stim["stimulus_id"] = str(uuid.uuid1())
stim["stimulus_index"] = p stim["stimulus_index"] = p
stim["start_time"] = stim_start stim["start_time"] = stim_start
@ -466,7 +466,7 @@ def scan_nix_file_for_repros(dataset):
stim["mtag_id"] = mt.id stim["mtag_id"] = mt.id
stim["stimulus_name"] = mt.name stim["stimulus_name"] = mt.name
stim.update(repro) stim.update(repro)
Stimulus.insert1(stim, skip_duplicates=True) _Stimulus.insert1(stim, skip_duplicates=True)
f.close() f.close()
f = None f = None
@ -476,7 +476,7 @@ def scan_folder_for_repros(dataset):
repro_settings, stim_indices = ut.read_stimuli_file(dataset["data_source"]) repro_settings, stim_indices = ut.read_stimuli_file(dataset["data_source"])
repro_counts = {} repro_counts = {}
for i, (rs, si) in enumerate(zip(repro_settings, stim_indices)): for i, (rs, si) in enumerate(zip(repro_settings, stim_indices)):
rp = Repro.get_template_tuple() rp = _Repro.get_template_tuple()
path = [] path = []
if not ut.find_key_recursive(rs, "run", path): if not ut.find_key_recursive(rs, "run", path):
ut.find_key_recursive(rs, "Run", path) ut.find_key_recursive(rs, "Run", path)
@ -501,10 +501,10 @@ def scan_folder_for_repros(dataset):
rp["start"] = 0. rp["start"] = 0.
rp["duration"] = 0. rp["duration"] = 0.
rp["settings"] = yaml.dump(rs) rp["settings"] = yaml.dump(rs)
Repro.insert1(rp, skip_duplicates=True) _Repro.insert1(rp, skip_duplicates=True)
# import stimuli # import stimuli
repro = dict((Repro & dict(repro_id=rp["repro_id"], dataset_id=rp["dataset_id"])).fetch1()) repro = dict((_Repro & dict(repro_id=rp["repro_id"], dataset_id=rp["dataset_id"])).fetch1())
repro.pop("settings") repro.pop("settings")
repro.pop("repro_name") repro.pop("repro_name")
repro.pop("start") repro.pop("start")
@ -524,7 +524,7 @@ def scan_folder_for_repros(dataset):
else: else:
stim_duration = 0.0 stim_duration = 0.0
stim = Stimulus.get_template_tuple() stim = _Stimulus.get_template_tuple()
stim["stimulus_id"] = str(uuid.uuid1()) stim["stimulus_id"] = str(uuid.uuid1())
stim["stimulus_index"] = j stim["stimulus_index"] = j
stim["start_time"] = stim_start stim["start_time"] = stim_start
@ -534,15 +534,15 @@ def scan_folder_for_repros(dataset):
stim["mtag_id"] = "" stim["mtag_id"] = ""
stim["stimulus_name"] = "" stim["stimulus_name"] = ""
stim.update(repro) stim.update(repro)
Stimulus.insert1(stim, skip_duplicates=True) _Stimulus.insert1(stim, skip_duplicates=True)
def populate_repros(data_path): def populate_repros(data_path):
print("\tImporting RePro(s) of %s" % data_path) print("\tImporting RePro(s) of %s" % data_path)
dset_name = os.path.split(data_path)[-1] dset_name = os.path.split(data_path)[-1]
if len(Dataset & {"dataset_id": dset_name}) != 1: if len(_Dataset & {"dataset_id": dset_name}) != 1:
return False return False
dataset = dict((Dataset & {"dataset_id": dset_name}).fetch1()) dataset = dict((_Dataset & {"dataset_id": dset_name}).fetch1())
if dataset["has_nix"]: if dataset["has_nix"]:
scan_nix_file_for_repros(dataset) scan_nix_file_for_repros(dataset)
@ -552,8 +552,8 @@ def populate_repros(data_path):
def drop_tables(): def drop_tables():
Dataset.drop() _Dataset.drop()
Subject.drop() _Subject.drop()
def populate(datasets, update=False): def populate(datasets, update=False):