restructuring cell, cell-dataset mulitmatch table

2019-03-09 17:59:45 +01:00 · 2019-03-09 17:59:45 +01:00 · ec7d08be9c
commit ec7d08be9c
parent d5ee54ad5d
2 changed files with 60 additions and 45 deletions
--- a/database.py
+++ b/database.py
@ -3,11 +3,9 @@ import datajoint as dj
 import nixio as nix
 import os
 import glob
-from util import read_info_file, find_key_recursive, deep_get
+from util import read_info_file, find_key_recursive, deep_get, read_dataset_info
 from IPython import embed
-import datetime as dt

-data_dir = 'data'
 schema = dj.schema("fish_book", locals())


@ -62,14 +60,13 @@ class Dataset(dj.Manual):
        return sane


-
@schema
 class Subject(dj.Manual):
    definition = """
    # Subject
    subject_id : varchar(256)
    ----
-    species : varchar(256) 
+    species : varchar(256)
    """

    @staticmethod
@ -112,7 +109,7 @@ class SubjectProperties(dj.Manual):
    definition = """
    # SubjectProperties
    id : int auto_increment
-    ---- 
+    ----
    -> Subject
    recording_date : date
    weight : float
@ -121,7 +118,8 @@ class SubjectProperties(dj.Manual):
    """

    def get_template_tuple(id=None):
-        tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0, eod_frequency=0.0)
+        tup = dict(id=None, subject_id=None, recording_date=None, weight=0.0, size=0.0,
+                   eod_frequency=0.0)
        if id is not None:
            return dict((SubjectProperties() & {"id": id}).fetch1())
        return tup
@ -131,11 +129,9 @@ class SubjectProperties(dj.Manual):
 class Cell(dj.Manual):
    definition = """
    # Table that stores information about recorded cells.
-    id : int auto_increment
+    cell_id : varchar(256)
    ----
    -> Subject
-    -> Dataset
-    cell_name : varchar(256)
    cell_type : varchar(256)
    firing_rate : float
    structure : varchar(256)
@ -148,41 +144,22 @@ class Cell(dj.Manual):

    @staticmethod
    def get_template_tuple(cell_id=None):
-        tup = dict(id=None, dataset_id=None, subject_id=None, cell_name="", cell_type="", firing_rate=0.0, depth=0.0,
-                   region="", subregion="", structure="", lateral_pos=0.0, transversal_section=0.0)
+        tup = dict(cell_id=None, subject_id=None, cell_type="", firing_rate=0.0,
+                   depth=0.0, region="", subregion="", structure="",
+                   lateral_pos=0.0, transversal_section=0.0)
        if cell_id is not None:
            d = dict((Cell() & {"cell_id": cell_id}).fetch1())
            return d
        return tup


-def read_info(info_file):
-    exp = ""
-    quality = ""
-    comment = ""
-    rec_date = None
-    has_nix = False
-    if not os.path.exists(info_file):
-        return exp, rec_date, quality, comment, has_nix
-    has_nix = len(glob.glob(os.path.sep.join(info_file.split(os.path.sep)[:-1]) + os.path.sep + "*.nix")) > 0
-    info = read_info_file(info_file)
-    p = []
-    find_key_recursive(info, "Experimenter", p)
-    if len(p) > 0:
-        exp = deep_get(info, p)
-    p = []
-    find_key_recursive(info, "Date", p)
-    if len(p) > 0:
-        rec_date = dt.date.fromisoformat(deep_get(info, p))
-    p = []
-    find_key_recursive(info, "Recording quality", p)
-    if len(p) > 0:
-        quality = deep_get(info, p)
-    find_key_recursive(info, "Comment", p)
-    if len(p) > 0:
-        comment = deep_get(info, p, default="")
-
-    return exp, rec_date, quality, comment, has_nix
+@schema
+class CellDatasetMap(dj.Manual):
+    definition = """
+    # Table that maps recorded cells to datasets
+    -> Dataset
+    -> Cell
+    """


 def populate_datasets(data_path):
@ -190,7 +167,7 @@ def populate_datasets(data_path):
    if not os.path.exists(data_path):
        return
    dset_name = os.path.split(data_path)[-1]
-    experimenter, rec_date, quality, comment, has_nix = read_info(os.path.join(data_path, 'info.dat'))
+    experimenter, rec_date, quality, comment, has_nix = read_dataset_info(os.path.join(data_path, 'info.dat'))
    if not experimenter:
        return

@ -243,7 +220,7 @@ def populate_subjects(data_path):


 def populate_cells(data_path):
-    print("Importing subject(s) of %s" % data_path)
+    print("Importing cell(s) of %s" % data_path)
    dset_name = os.path.split(data_path)[-1]
    info_file = os.path.join(data_path, 'info.dat')
    if not os.path.exists(info_file):
@ -266,10 +243,11 @@ def populate_cells(data_path):
    dataset = dict((Dataset & {"dataset_id": dset_name}).fetch1())
    subject = dict((Subject & {"subject_id": subject_info["Identifier"]}).fetch1())

+    dataset_id = dataset["dataset_id"]
+    cell_id = "-".join(dataset_id.split("-")[:4]) if len(dataset_id) > 4 else dataset_id
    cell_props = Cell.get_template_tuple()
    cell_props["subject_id"] = subject["subject_id"]
-    cell_props["dataset_id"] = dataset["dataset_id"]
-    cell_props["cell_name"] = dataset["dataset_id"]
+    cell_props["cell_id"] = cell_id
    cell_props["cell_type"] = cell_info["CellType"]
    cell_props["firing_rate"] = firing_rate
    if "Structure" in cell_info.keys():
@ -287,6 +265,10 @@ def populate_cells(data_path):

    Cell.insert1(cell_props, skip_duplicates=True)

+    # multi mach entry
+    mm = dict(dataset_id=dataset["dataset_id"], cell_id=cell_props["cell_id"])
+    CellDatasetMap.insert1(mm, skip_duplicates=True)
+

 def drop_tables():
    Dataset.drop()
@ -301,6 +283,7 @@ def populate(datasets):


 if __name__ == "__main__":
-    datasets = glob.glob('/data/apteronotus/2018-05-08*')
+    data_dir = "../../science/high_frequency_chirps/data"
+    datasets = glob.glob(os.path.join(data_dir, '2018*'))
    # drop_tables()
    populate(datasets)
--- a/util.py
+++ b/util.py
@ -1,5 +1,8 @@
-from IPython import embed
 from functools import reduce
+import os
+import glob
+import datetime as dt
+

 def read_info_file(file_name):
    """
@ -43,3 +46,32 @@ def deep_get(dictionary, keys, default=None):
    assert(isinstance(keys, list))
    return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys, dictionary)

+
+def read_dataset_info(info_file):
+    exp = ""
+    quality = ""
+    comment = ""
+    rec_date = None
+    has_nix = False
+    if not os.path.exists(info_file):
+        return exp, rec_date, quality, comment, has_nix
+    has_nix = len(glob.glob(os.path.sep.join(info_file.split(os.path.sep)[:-1]) + os.path.sep + "*.nix")) > 0
+    info = read_info_file(info_file)
+    p = []
+    find_key_recursive(info, "Experimenter", p)
+    if len(p) > 0:
+        exp = deep_get(info, p)
+    p = []
+    find_key_recursive(info, "Date", p)
+    if len(p) > 0:
+        rec_date = dt.date.fromisoformat(deep_get(info, p))
+    p = []
+    find_key_recursive(info, "Recording quality", p)
+    if len(p) > 0:
+        quality = deep_get(info, p)
+    find_key_recursive(info, "Comment", p)
+    if len(p) > 0:
+        comment = deep_get(info, p, default="")
+
+    return exp, rec_date, quality, comment, has_nix
+