forked from jgrewe/fishbook
sometimes repros do not put out stimuli for whatever reason. They are noted in the stimuli.dat but not e.g. in the stimspikes.dat. This leads to conflicts when fetching the data...
577 lines
20 KiB
Python
577 lines
20 KiB
Python
import numpy as np
|
|
import datajoint as dj
|
|
import nixio as nix
|
|
import os
|
|
import glob
|
|
import socket
|
|
from fishbook.backend.util import read_info_file, read_dataset_info, read_stimuli_file
|
|
from fishbook.backend.util import find_key_recursive, deep_get, find_mtags_for_tag
|
|
from fishbook.backend.util import mtag_settings_to_yaml, nix_metadata_to_yaml, mtag_features_to_yaml, progress
|
|
import uuid
|
|
import yaml
|
|
|
|
from IPython import embed
|
|
|
|
dj.config["enable_python_native_blobs"] = True
|
|
schema = dj.schema("fish_book", locals())
|
|
|
|
|
|
@schema
|
|
class Datasets(dj.Manual):
|
|
definition = """ # _Dataset
|
|
dataset_id : varchar(256)
|
|
----
|
|
data_source : varchar(512) # path to the dataset
|
|
data_host : varchar(512) # fully qualified domain name
|
|
experimenter : varchar(512)
|
|
setup : varchar(128)
|
|
recording_date : date
|
|
quality : varchar(512)
|
|
comment : varchar(1024)
|
|
duration : float
|
|
has_nix : bool
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(id=None):
|
|
if id is not None:
|
|
d = dict((Datasets() & {"dataset_id": id}).fetch1())
|
|
return d
|
|
return dict(dataset_id=None, data_source="", data_host="", experimenter="", setup="",
|
|
recording_date=None, quality="", comment="", duration=0.0, has_nix=False)
|
|
|
|
@staticmethod
|
|
def get_nix_file(key):
|
|
dset = (Datasets() & key).fetch1()
|
|
if dset["ignore"]:
|
|
return None
|
|
file_path = os.path.join(dset["data_source"], dset["dataset_id"] + ".nix")
|
|
if not (os.path.exists(file_path)):
|
|
print("\t No nix file found for path: %s" % dset["data_source"])
|
|
return None
|
|
if not Datasets.check_file_integrity(file_path):
|
|
return None
|
|
return file_path
|
|
|
|
@staticmethod
|
|
def check_file_integrity(nix_file):
|
|
sane = True
|
|
try:
|
|
f = nix.File.open(nix_file, nix.FileMode.ReadOnly)
|
|
b = f.blocks[0]
|
|
m = b.metadata
|
|
if "Recording" not in m.sections:
|
|
Warning("\t Could not find Recording section in dataset: %s" % nix_file)
|
|
sane = False
|
|
f.close()
|
|
except ():
|
|
f = None
|
|
print("file: %s is NOT SANE!")
|
|
sane = False
|
|
return sane
|
|
|
|
|
|
@schema
|
|
class Subjects(dj.Manual):
|
|
definition = """
|
|
# Subjects
|
|
subject_id : varchar(256)
|
|
----
|
|
species : varchar(256)
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(subject_id=None):
|
|
tup = dict(subject_id=None, species="")
|
|
if subject_id is not None:
|
|
d = dict((Subjects() & {"subject_id": subject_id}).fetch1())
|
|
return d
|
|
return tup
|
|
|
|
def make(self, key):
|
|
file_path = Datasets.get_nix_file(key)
|
|
if file_path is None:
|
|
return
|
|
nix_file = nix.File.open(file_path, nix.FileMode.ReadOnly)
|
|
m = nix_file.blocks[0].metadata
|
|
inserts = Subjects.get_template_tuple()
|
|
subj_info = m["Recording"]["Subject"]
|
|
inserts["subject_id"] = subj_info["Identifier"]
|
|
inserts["species"] = subj_info["Species"][0]
|
|
inserts["weight"] = subj_info["Weight"]
|
|
inserts["size"] = subj_info["Size"]
|
|
inserts["eod_frequency"] = np.round(subj_info["EOD Frequency"] * 10) / 10
|
|
inserts.update(key)
|
|
self.insert1(inserts, skip_duplicates=True)
|
|
nix_file.close()
|
|
|
|
#@property
|
|
#def datasets(self):
|
|
# retrun
|
|
|
|
|
|
@schema
|
|
class SubjectDatasetMap(dj.Manual):
|
|
definition = """
|
|
# SubjectDatasetMap
|
|
-> Subjects
|
|
-> Datasets
|
|
"""
|
|
|
|
|
|
@schema
|
|
class SubjectProperties(dj.Manual):
|
|
definition = """
|
|
# _SubjectProperties
|
|
id : int auto_increment
|
|
----
|
|
-> Subjects
|
|
recording_date : date
|
|
weight : float
|
|
size : float
|
|
eod_frequency : float
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(id=None):
|
|
tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0,
|
|
eod_frequency=0.0)
|
|
if id is not None:
|
|
return dict((SubjectProperties() & {"id": id}).fetch1())
|
|
return tup
|
|
|
|
|
|
@schema
|
|
class Cells(dj.Manual):
|
|
definition = """
|
|
# Table that stores information about recorded cells.
|
|
cell_id : varchar(256)
|
|
----
|
|
-> Subjects
|
|
cell_type : varchar(256)
|
|
firing_rate : float
|
|
structure : varchar(256)
|
|
region : varchar(256)
|
|
subregion : varchar(256)
|
|
depth : float
|
|
lateral_pos : float
|
|
transversal_section : float
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(cell_id=None):
|
|
tup = dict(cell_id=None, subject_id=None, cell_type="", firing_rate=0.0,
|
|
depth=0.0, region="", subregion="", structure="",
|
|
lateral_pos=0.0, transversal_section=0.0)
|
|
if cell_id is not None:
|
|
d = dict((Cells() & {"cell_id": cell_id}).fetch1())
|
|
return d
|
|
return tup
|
|
|
|
|
|
@schema
|
|
class CellDatasetMap(dj.Manual):
|
|
definition = """
|
|
# Table that maps recorded cells to datasets
|
|
-> Datasets
|
|
-> Cells
|
|
"""
|
|
|
|
|
|
@schema
|
|
class Repros(dj.Manual):
|
|
definition = """
|
|
repro_id : varchar(512) # The name that was given to the RePro run by relacs
|
|
run : smallint # A counter counting the runs of the ReProp in this dataset
|
|
-> Cells #
|
|
----
|
|
repro_name : varchar(512) # The original name of the RePro itself, not any given name by user or relacs
|
|
settings : varchar(3000) # Yaml formatted string containing the repro settings (tag.metadata in case of a nix file)
|
|
start : float # The start time of the repro
|
|
duration : float # The duration of the repro
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(repro_id=None):
|
|
tup = dict(repro_id=None, cell_id=None, run=0, repro_name="", settings=None, start=None, duration=None)
|
|
if repro_id is not None:
|
|
d = dict((Repros() & {"repro_id": repro_id}).fetch1())
|
|
return d
|
|
return tup
|
|
|
|
|
|
@schema
|
|
class Stimuli(dj.Manual):
|
|
definition = """
|
|
stimulus_id : varchar(50)
|
|
-> Repros
|
|
---
|
|
stimulus_index : int
|
|
stimulus_name : varchar(512)
|
|
mtag_id : varchar(50)
|
|
start_time : float
|
|
start_index : int
|
|
duration : float
|
|
settings : varchar(3000)
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_template_tuple(stimulus_id=None):
|
|
if stimulus_id is not None:
|
|
tup = dict((Stimuli & {"stimulus_id": stimulus_id}).fetch1())
|
|
else:
|
|
tup = dict(stimulus_id=None, stimulus_index=None, stimulus_name="", start_index=0, start_time=0.0,
|
|
duration=0.0, settings=None)
|
|
return tup
|
|
|
|
|
|
def populate_datasets(data_path, update=False):
|
|
if not os.path.exists(data_path):
|
|
return False
|
|
dset_name = os.path.split(data_path)[-1]
|
|
experimenter, rec_date, quality, comment, has_nix, rec_duration, setup = read_dataset_info(os.path.join(data_path, 'info.dat'))
|
|
if not experimenter:
|
|
return False
|
|
|
|
inserts = Datasets.get_template_tuple()
|
|
inserts["dataset_id"] = dset_name
|
|
inserts["data_source"] = os.path.abspath(data_path)
|
|
inserts["data_host"] = socket.getfqdn()
|
|
inserts["experimenter"] = experimenter
|
|
inserts["recording_date"] = rec_date
|
|
inserts["quality"] = quality if not isinstance(quality, dict) else ""
|
|
inserts["comment"] = comment if not isinstance(comment, dict) else ""
|
|
inserts["duration"] = rec_duration
|
|
inserts["setup"] = setup
|
|
inserts["has_nix"] = has_nix
|
|
if len(Datasets & "dataset_id like '%s'" % inserts["dataset_id"]) > 0 and not update:
|
|
print('\t\t %s is already in database!' % dset_name)
|
|
return False
|
|
Datasets().insert1(inserts, skip_duplicates=True)
|
|
return True
|
|
|
|
|
|
def populate_subjects(data_path):
|
|
print("\tImporting subject(s) of %s" % data_path)
|
|
dset_name = os.path.split(data_path)[-1]
|
|
info_file = os.path.join(data_path, 'info.dat')
|
|
if not os.path.exists(info_file):
|
|
return None, None, False
|
|
info = read_info_file(info_file)
|
|
|
|
p = []
|
|
find_key_recursive(info, "Subject", p)
|
|
subj = {}
|
|
if len(p) > 0:
|
|
subj = deep_get(info, p)
|
|
|
|
inserts = Subjects.get_template_tuple()
|
|
subj_id = None
|
|
if "Identifier" in subj.keys():
|
|
if isinstance(subj["Identifier"], dict):
|
|
subj_id = "unspecified_" + dset_name
|
|
else:
|
|
subj_id = subj["Identifier"]
|
|
elif "Identifier" in info.keys():
|
|
if isinstance(info["Identifier"], dict):
|
|
subj_id = "unspecified_" + dset_name
|
|
else:
|
|
subj_id = info["Identifier"]
|
|
else:
|
|
subj_id = "unspecified_" + dset_name
|
|
inserts["subject_id"] = subj_id
|
|
inserts["species"] = subj["Species"]
|
|
Subjects().insert1(inserts, skip_duplicates=True)
|
|
|
|
# multi match entry
|
|
dataset = dict((Datasets() & {"dataset_id": dset_name}).fetch1())
|
|
mm = dict(dataset_id=dataset["dataset_id"], subject_id=inserts["subject_id"])
|
|
SubjectDatasetMap.insert1(mm, skip_duplicates=True)
|
|
|
|
# subject properties
|
|
props = SubjectProperties.get_template_tuple()
|
|
props["subject_id"] = inserts["subject_id"]
|
|
props["recording_date"] = dataset["recording_date"]
|
|
if "Weight" in subj.keys():
|
|
props["weight"] = np.round(float(subj["Weight"][:-1]), 1)
|
|
if "Size" in subj.keys():
|
|
props["size"] = np.round(float(subj["Size"][:-2]), 1)
|
|
if "EOD Frequency" in subj.keys():
|
|
props["eod_frequency"] = np.round(float(subj["EOD Frequency"][:-2]))
|
|
p = props.copy()
|
|
p.pop("id")
|
|
if len(SubjectProperties & p) == 0:
|
|
SubjectProperties.insert1(props, skip_duplicates=True)
|
|
|
|
|
|
def populate_cells(data_path):
|
|
print("\tImporting cell(s) of %s" % data_path)
|
|
dset_name = os.path.split(data_path)[-1]
|
|
info_file = os.path.join(data_path, 'info.dat')
|
|
if not os.path.exists(info_file):
|
|
return None, None, False
|
|
info = read_info_file(info_file)
|
|
p = []
|
|
find_key_recursive(info, "Subject", p)
|
|
subject_info = deep_get(info, p)
|
|
|
|
p = []
|
|
find_key_recursive(info, "Cell", p)
|
|
cell_info = deep_get(info, p)
|
|
|
|
p = []
|
|
res = find_key_recursive(info, "Firing Rate1", p)
|
|
if res:
|
|
firing_rate = deep_get(info, p, default=0.0)
|
|
else:
|
|
firing_rate = 0.0
|
|
if isinstance(firing_rate, str):
|
|
firing_rate = float(firing_rate[:-2])
|
|
|
|
subj_id = None
|
|
if "Identifier" in subject_info.keys():
|
|
if isinstance(subject_info["Identifier"], dict):
|
|
subj_id = "unspecified_" + dset_name
|
|
else:
|
|
subj_id = subject_info["Identifier"]
|
|
elif "Identifier" in info.keys():
|
|
if isinstance(info["Identifier"], dict):
|
|
subj_id = "unspecified_" + dset_name
|
|
else:
|
|
subj_id = info["Identifier"]
|
|
else:
|
|
subj_id = "unspecified_" + dset_name
|
|
dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1())
|
|
subject = dict((Subjects & {"subject_id": subj_id}).fetch1())
|
|
|
|
dataset_id = dataset["dataset_id"]
|
|
cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id
|
|
cell_props = Cells.get_template_tuple()
|
|
cell_props["subject_id"] = subject["subject_id"]
|
|
cell_props["cell_id"] = cell_id
|
|
cell_props["cell_type"] = cell_info["CellType"]
|
|
cell_props["firing_rate"] = firing_rate
|
|
if "Structure" in cell_info.keys():
|
|
cell_props["structure"] = cell_info["Structure"]
|
|
if "BrainRegion" in cell_info.keys():
|
|
cell_props["region"] = cell_info["BrainRegion"]
|
|
if "BrainSubRegion" in cell_info.keys():
|
|
cell_props["subregion"] = cell_info["BrainSubRegion"]
|
|
if "Depth" in cell_info.keys():
|
|
cell_props["depth"] = float(cell_info["Depth"][:-2])
|
|
if "Lateral position" in cell_info.keys():
|
|
cell_props["lateral_pos"] = float(cell_info["Lateral position"][:-2])
|
|
if "Transverse section" in cell_info.keys():
|
|
cell_props["transversal_section"] = float(cell_info["Transverse section"])
|
|
Cells.insert1(cell_props, skip_duplicates=True)
|
|
|
|
# multi match entry
|
|
mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"])
|
|
CellDatasetMap.insert1(mm, skip_duplicates=True)
|
|
|
|
|
|
def scan_nix_file_for_repros(dataset):
|
|
print("\t\tscanning nix file")
|
|
cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
|
|
nix_files = glob.glob(os.path.join(dataset["data_source"], "*.nix"))
|
|
for nf in nix_files:
|
|
if not Datasets.check_file_integrity(nf):
|
|
print("\t\tfile is not sane!!!")
|
|
continue
|
|
f = nix.File.open(nf, nix.FileMode.ReadOnly)
|
|
b = f.blocks[0]
|
|
repro_runs = [t for t in b.tags if "relacs.repro_run" in t.type]
|
|
total = len(repro_runs)
|
|
for i, t in enumerate(repro_runs):
|
|
rs = t.metadata.find_sections(lambda x: "Run" in x.props)
|
|
rs = rs[0]
|
|
if len(rs) == 0:
|
|
continue
|
|
|
|
progress(i+1, total, "Scanning repro run %s" % rs["RePro"])
|
|
|
|
rp = Repros.get_template_tuple()
|
|
rp["run"] = rs["Run"]
|
|
rp["repro_name"] = rs["RePro"]
|
|
rp["cell_id"] = cell_id
|
|
rp["repro_id"] = t.name
|
|
settings = t.metadata.find_sections(lambda x: "settings" in x.type)
|
|
if len(settings) > 0:
|
|
rp["settings"] = nix_metadata_to_yaml(settings[0])
|
|
else:
|
|
rp["settings"] = nix_metadata_to_yaml(t.metadata)
|
|
rp["start"] = t.position[0]
|
|
rp["duration"] = t.extent[0]
|
|
Repros.insert1(rp, skip_duplicates=True)
|
|
|
|
# import Stimuli
|
|
repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1())
|
|
repro.pop("settings")
|
|
repro.pop("repro_name")
|
|
repro.pop("start")
|
|
repro.pop("duration")
|
|
|
|
mtags, positions = find_mtags_for_tag(b, t)
|
|
mt_settings_dict = {}
|
|
positions_dict = {}
|
|
extents_dict = {}
|
|
for j, mt in enumerate(mtags):
|
|
if mt.id in positions_dict.keys():
|
|
mt_positions = positions_dict[mt.id]
|
|
mt_extents = extents_dict[mt.id]
|
|
mdata_yaml = mt_settings_dict[mt.id]
|
|
else:
|
|
mdata_yaml = nix_metadata_to_yaml(mt.metadata)
|
|
mt_settings_dict[mt.id] = mdata_yaml
|
|
mt_positions = np.atleast_2d(mt.positions[:])
|
|
mt_extents = np.atleast_2d(mt.extents[:])
|
|
if mt.positions.shape[0] != mt_positions.shape[0]:
|
|
mt_positions = mt_positions.T
|
|
mt_extents = mt_extents.T
|
|
for p in positions[j]:
|
|
settings = mtag_features_to_yaml(mt, p, mdata_yaml)
|
|
stim_start = mt_positions[p, 0]
|
|
stim_duration = mt_extents[p, 0]
|
|
|
|
stim = Stimuli.get_template_tuple()
|
|
stim["stimulus_id"] = str(uuid.uuid1())
|
|
stim["stimulus_index"] = p
|
|
stim["start_time"] = stim_start
|
|
stim["start_index"] = -1
|
|
stim["duration"] = stim_duration
|
|
stim["settings"] = settings
|
|
stim["mtag_id"] = mt.id
|
|
stim["stimulus_name"] = mt.name
|
|
stim.update(repro)
|
|
Stimuli.insert1(stim, skip_duplicates=True)
|
|
print(" " * 120, end="\r")
|
|
print("\n")
|
|
f.close()
|
|
f = None
|
|
|
|
|
|
def scan_folder_for_repros(dataset):
|
|
print("\t\tNo nix-file, scanning directory!")
|
|
repro_settings, stim_indices = read_stimuli_file(dataset["data_source"])
|
|
repro_counts = {} # internal counter for repro runs, for cases in which the repro does not have a run counter
|
|
repros_skipped = {} # internal counter to correct the run counter for repro runs in which no stimulus was put out, needed to align e.g. with stimspikes file
|
|
repros_without_stims = ["baselineactivity"]
|
|
cell_id = (Cells * CellDatasetMap * (Datasets & "dataset_id = '%s'" % dataset["dataset_id"])).fetch("cell_id", limit=1)[0]
|
|
for rs, si in zip(repro_settings, stim_indices):
|
|
rp = Repros.get_template_tuple()
|
|
|
|
path = []
|
|
if not find_key_recursive(rs, "repro", path):
|
|
find_key_recursive(rs, "RePro", path)
|
|
rp["repro_name"] = deep_get(rs, path, "None")
|
|
|
|
if rp["repro_name"] not in repros_skipped.keys():
|
|
repros_skipped[rp["repro_name"]] = 0
|
|
|
|
if rp["repro_name"].lower() not in repros_without_stims and len(si) == 0:
|
|
repros_skipped[rp["repro_name"]] += 1
|
|
continue
|
|
|
|
path = []
|
|
if rp["repro_name"] in repro_counts.keys():
|
|
repro_counts[rp["repro_name"]] += 1
|
|
else:
|
|
repro_counts[rp["repro_name"]] = 0
|
|
path = []
|
|
if not find_key_recursive(rs, "run", path):
|
|
find_key_recursive(rs, "Run", path)
|
|
if len(path) > 0:
|
|
rp["run"] = int(deep_get(rs, path, 0)) - repros_skipped[rp["repro_name"]]
|
|
else: # the run information is not there and needs to be fixed!
|
|
rp["run"] = repro_counts[rp["repro_name"]] - repros_skipped[rp["repro_name"]]
|
|
|
|
rp["cell_id"] = cell_id
|
|
rp["repro_id"] = rp["repro_name"] + str(repro_counts[rp["repro_name"]])
|
|
rp["start"] = 0.
|
|
rp["duration"] = 0.
|
|
rp["settings"] = yaml.dump(rs).replace("'", "")
|
|
Repros.insert1(rp, skip_duplicates=True)
|
|
|
|
# import stimuli
|
|
repro = dict((Repros & dict(repro_id=rp["repro_id"], cell_id=cell_id)).fetch1())
|
|
repro.pop("settings")
|
|
repro.pop("repro_name")
|
|
repro.pop("start")
|
|
repro.pop("duration")
|
|
|
|
total = len(si.keys())
|
|
for j, k in enumerate(si.keys()):
|
|
progress(j+1, total, "scanning repro %s" % rp["repro_name"])
|
|
s = int(si[k])
|
|
stim_start = 0.
|
|
path = []
|
|
if not find_key_recursive(rs, "duration", path):
|
|
find_key_recursive(rs, "Duration", path)
|
|
if len(path) > 0 :
|
|
stim_duration = deep_get(rs, path, None)
|
|
if "sec" in stim_duration:
|
|
stim_duration = float(stim_duration[:stim_duration.index("sec")])
|
|
elif "ms" in stim_duration:
|
|
stim_duration = float(stim_duration[:stim_duration.index("ms")]) / 1000
|
|
else:
|
|
stim_duration = float(stim_duration[:stim_duration.index("s")])
|
|
else:
|
|
stim_duration = 0.0
|
|
|
|
stim = Stimuli.get_template_tuple()
|
|
stim["stimulus_id"] = str(uuid.uuid1())
|
|
stim["stimulus_index"] = j
|
|
stim["start_time"] = stim_start
|
|
stim["start_index"] = s
|
|
stim["duration"] = stim_duration
|
|
stim["settings"] = yaml.dump(rs).replace("'", "")
|
|
stim["mtag_id"] = ""
|
|
stim["stimulus_name"] = ""
|
|
stim.update(repro)
|
|
Stimuli.insert1(stim, skip_duplicates=True)
|
|
print(" " *120, end='\r')
|
|
#if i < len(repro_settings):
|
|
# print((" " * 150), end="\r")
|
|
|
|
|
|
def populate_repros(data_path):
|
|
print("\tImporting RePro(s) of %s" % data_path)
|
|
dset_name = os.path.split(data_path)[-1]
|
|
if len(Datasets & {"dataset_id": dset_name}) != 1:
|
|
return False
|
|
dataset = dict((Datasets & {"dataset_id": dset_name}).fetch1())
|
|
|
|
if dataset["has_nix"]:
|
|
scan_nix_file_for_repros(dataset)
|
|
else:
|
|
scan_folder_for_repros(dataset)
|
|
return True
|
|
|
|
|
|
def drop_tables():
|
|
Datasets.drop()
|
|
Subjects.drop()
|
|
|
|
|
|
def populate(datasets, update=False):
|
|
for i, d in enumerate(datasets):
|
|
print("Importing %i of %i: %s" % (i+1, len(datasets), d))
|
|
if not populate_datasets(d, update):
|
|
continue
|
|
populate_subjects(d)
|
|
populate_cells(d)
|
|
try:
|
|
populate_repros(d)
|
|
except ():
|
|
print("\t\tsomething went wrong! %s" % d)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
data_dir = "/data/apteronotus"
|
|
# data_dir = "../high_freq_chirps/data"
|
|
# drop_tables()
|
|
# datasets = glob.glob("/Users/jan/zwischenlager/2012-*")2010-06-21-ac/info.dat
|
|
datasets = glob.glob(os.path.join(data_dir, '/data/apteronotus/2010-06-18*'))
|
|
populate(datasets, update=False)
|
|
|