gp_neurobio/code/NixFrame.py

import nixio as nix
from IPython import embed
import numpy as np
import os
import pandas as pd
import pickle

def DataFrame(nixfile, savefile=False, saveto='./'):
    '''
    opens a nix file, extracts the data and converts it to a pandas.DataFrame

    :param nixfile (string): path and name of .nix file
    :param savefile (string): if not False, the dataframe will be saved as <savefile>.pickle
    :param saveto (string): path to save the files in NOT IMPLEMENTED YET
    :return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data
    '''

    block = nix.File.open(nixfile,'r').blocks[0]

    data_arrays = block.data_arrays
    names = [data_arrays[i].name for i in range(len(data_arrays))]
    shapes = [x.shape for x in data_arrays]
    data_names = np.array([[x,i] for i,x in enumerate(names) if (shapes[i][0] >= 0.999*shapes[0][0])])
    data_traces = np.array([data_arrays[name][:] for name,idx in data_names])
    time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0])
    dt = time[1]-time[0]

    block_metadata = {}
    block_metadata[block.id] = getMetadataDict(block.metadata)

    tag = block.tags
    tag_metadata = {}
    tag_id_times = {}
    for i in range(len(tag)):
        meta = tag[i].metadata
        tag_metadata[meta.id] = getMetadataDict(meta)
        tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]]

    data = []
    stim_num = -1
    protocol_idcs = np.where([' onset times' in name for name in names])[0]
    for i in range(len(protocol_idcs)):
        # print(names[int(protocol_idcs[i])].split(' onset times')[0])
        protocol = names[protocol_idcs[i]].split(' onset times')[0]

        #skip certain protocols
        if 'VC=' in protocol:
            # print('skip this protocol')
            continue

        #number of meta data entries
        if i == len(protocol_idcs)-1:
            meta_len = len(names) - protocol_idcs[i]
        else:
            meta_len = protocol_idcs[i+1] - protocol_idcs[i]

        #get new line for every sweep and save the data, make a pn subtraction if necessary
        if any([protocol + '_pn' == string for string in names[protocol_idcs[i]:protocol_idcs[i]+meta_len]]):
            pn = data_arrays[protocol + '_pn'][0]
            sweeps = np.arange(np.abs(pn),len(data_arrays[int(protocol_idcs[i])][:]),(np.abs(pn)+1), dtype=int)
        else:
            pn = np.nan
            sweeps = np.arange(len(data_arrays[int(protocol_idcs[i])][:]), dtype=int)

        for sweep in sweeps:
            stim_num +=1
            data.append({})

            # save protocol names
            split_vec = protocol.split('-')
            if len(split_vec)>2:
                prot_name = split_vec[0]
                prot_num = int(split_vec[-1])
                for j in range(len(split_vec)-2):
                    prot_name += '-' + split_vec[j+1]
            else:
                prot_name = split_vec[0]
                prot_num = split_vec[-1]
            data[stim_num]['protocol'] = prot_name
            data[stim_num]['protocol_number'] = prot_num

            #save id
            data[stim_num]['id'] = data_arrays[int(protocol_idcs[i])].id

            #save rest of stored data
            for idx in range(meta_len):
                j = int(protocol_idcs[i] + idx)
                if (' durations' in names[j]) or (' onset times' in names[j]):
                    continue
                if len(data_arrays[j][sweep]) == 1:
                    data[stim_num][names[j].split(protocol + '_')[-1]] = data_arrays[j][sweep][0]
                else:
                    data[stim_num][names[j].split(protocol+'_')[-1]] = data_arrays[j][sweep]
            data[stim_num]['samplingrate'] = 1/dt

            #save data arrays
            onset = data_arrays[protocol + ' onset times'][sweep]
            dur = data_arrays[protocol + ' durations'][sweep]
            t0 = int(onset/dt)
            t1 = int((onset+dur)/dt+1)
            data[stim_num]['onset time'] = onset
            data[stim_num]['duration'] = dur

            for name,idx in data_names:
                data[stim_num][name] = data_traces[int(idx)][t0:t1]

            for j in np.arange(int(idx)+1,protocol_idcs[0]):
                bool_vec = (data_arrays[names[j]][:]>=onset) & (data_arrays[names[j]][:]<=onset+dur)
                data[stim_num][names[j]] = np.array(data_arrays[names[j]])[bool_vec]

            data[stim_num]['time'] = time[t0:t1] - data[stim_num]['onset time']

            #pn-subtraction (if necessary)
            '''
            change the location of the pn (its already in the metadata, you dont need it as option
            '''
            if pn != np.nan and np.abs(pn)>0:
                pn_curr = np.zeros(len(data[stim_num][name]))
                idx = np.where(data_names[:,0] == 'Current-1')[0][0]
                for j in range(int(np.abs(pn))):
                    onset = data_arrays[protocol + ' onset times'][sweep-j-1]
                    t0 = int(onset / dt)
                    t1 = int(onset/dt + len(data[stim_num]['Current-1']))
                    pn_curr += data_traces[int(idx),t0:t1]

                data[stim_num]['Current-2'] = data[stim_num]['Current-1'] - pn/np.abs(pn)*pn_curr #- data[stim_num][name][0] - pn_curr[0]


            '''
            this one saves the complete metadata in EVERY line
            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!THINK OF SOMETHING BETTER!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            '''

            tag_id = None
            for key in tag_id_times.keys():
                if (data[stim_num]['onset time'] >= tag_id_times[key][0]) and (data[stim_num]['onset time'] <= tag_id_times[key][1]):
                    tag_id = key
            # # save metadata
            data[stim_num]['block_meta'] = block_metadata[list(block_metadata.keys())[0]]
            data[stim_num]['tag_meta'] = tag_metadata[tag_id]

            # add block id
            data[stim_num]['block_id'] = list(block_metadata.keys())[0]
            data[stim_num]['tag_id'] = tag_id

    data = pd.DataFrame(data)
    if savefile != False:
        if savefile == True:
            savefile = nixfile.split('/')[-1].split('.nix')[0]

        with open(savefile + '_dataframe.pickle', 'wb') as f:
            pickle.dump(data, f, -1)  # create pickle-files, using the highest pickle-protocol
    # embed()
    return data

def NixToFrame(folder):
    '''
    searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder

    :param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files
    '''
    if folder[-1] != '/':
        folder = folder + '/'

    dirlist = os.listdir(folder)
    for dir in dirlist:
        if os.path.isdir(folder + dir):
            for file in os.listdir(folder+dir):

                if '.nix' in file:
                    print(file)
                    DataFrame(folder+dir+'/'+file, True, folder)

def load_data(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)  # load data with pickle
    return data

def getMetadataDict(metadata):
    def unpackMetadata(sec):
        metadata = dict()
        metadata = {prop.name: sec[prop.name] for prop in sec.props}
        if hasattr(sec, 'sections') and len(sec.sections) > 0:
            metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections})
        return metadata

    return unpackMetadata(metadata)