gp_neurobio/code/NixFrame.py
2018-11-15 11:01:41 +01:00

189 lines
7.5 KiB
Python

import nixio as nix
from IPython import embed
import numpy as np
import os
import pandas as pd
import pickle
def DataFrame(nixfile, savefile=False, saveto='./'):
'''
opens a nix file, extracts the data and converts it to a pandas.DataFrame
:param nixfile (string): path and name of .nix file
:param savefile (string): if not False, the dataframe will be saved as <savefile>.pickle
:param saveto (string): path to save the files in NOT IMPLEMENTED YET
:return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data
'''
block = nix.File.open(nixfile,'r').blocks[0]
data_arrays = block.data_arrays
names = [data_arrays[i].name for i in range(len(data_arrays))]
shapes = [x.shape for x in data_arrays]
data_names = np.array([[x,i] for i,x in enumerate(names) if (shapes[i][0] >= 0.999*shapes[0][0])])
data_traces = np.array([data_arrays[name][:] for name,idx in data_names])
time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0])
dt = time[1]-time[0]
block_metadata = {}
block_metadata[block.id] = getMetadataDict(block.metadata)
tag = block.tags
tag_metadata = {}
tag_id_times = {}
for i in range(len(tag)):
meta = tag[i].metadata
tag_metadata[meta.id] = getMetadataDict(meta)
tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]]
data = []
stim_num = -1
protocol_idcs = np.where([' onset times' in name for name in names])[0]
for i in range(len(protocol_idcs)):
# print(names[int(protocol_idcs[i])].split(' onset times')[0])
protocol = names[protocol_idcs[i]].split(' onset times')[0]
#skip certain protocols
if 'VC=' in protocol:
# print('skip this protocol')
continue
#number of meta data entries
if i == len(protocol_idcs)-1:
meta_len = len(names) - protocol_idcs[i]
else:
meta_len = protocol_idcs[i+1] - protocol_idcs[i]
#get new line for every sweep and save the data, make a pn subtraction if necessary
if any([protocol + '_pn' == string for string in names[protocol_idcs[i]:protocol_idcs[i]+meta_len]]):
pn = data_arrays[protocol + '_pn'][0]
sweeps = np.arange(np.abs(pn),len(data_arrays[int(protocol_idcs[i])][:]),(np.abs(pn)+1), dtype=int)
else:
pn = np.nan
sweeps = np.arange(len(data_arrays[int(protocol_idcs[i])][:]), dtype=int)
for sweep in sweeps:
stim_num +=1
data.append({})
# save protocol names
split_vec = protocol.split('-')
if len(split_vec)>2:
prot_name = split_vec[0]
prot_num = int(split_vec[-1])
for j in range(len(split_vec)-2):
prot_name += '-' + split_vec[j+1]
else:
prot_name = split_vec[0]
prot_num = split_vec[-1]
data[stim_num]['protocol'] = prot_name
data[stim_num]['protocol_number'] = prot_num
#save id
data[stim_num]['id'] = data_arrays[int(protocol_idcs[i])].id
#save rest of stored data
for idx in range(meta_len):
j = int(protocol_idcs[i] + idx)
if (' durations' in names[j]) or (' onset times' in names[j]):
continue
if len(data_arrays[j][sweep]) == 1:
data[stim_num][names[j].split(protocol + '_')[-1]] = data_arrays[j][sweep][0]
else:
data[stim_num][names[j].split(protocol+'_')[-1]] = data_arrays[j][sweep]
data[stim_num]['samplingrate'] = 1/dt
#save data arrays
onset = data_arrays[protocol + ' onset times'][sweep]
dur = data_arrays[protocol + ' durations'][sweep]
t0 = int(onset/dt)
t1 = int((onset+dur)/dt+1)
data[stim_num]['onset time'] = onset
data[stim_num]['duration'] = dur
for name,idx in data_names:
data[stim_num][name] = data_traces[int(idx)][t0:t1]
for j in np.arange(int(idx)+1,protocol_idcs[0]):
bool_vec = (data_arrays[names[j]][:]>=onset) & (data_arrays[names[j]][:]<=onset+dur)
data[stim_num][names[j]] = np.array(data_arrays[names[j]])[bool_vec]
data[stim_num]['time'] = time[t0:t1] - data[stim_num]['onset time']
#pn-subtraction (if necessary)
'''
change the location of the pn (its already in the metadata, you dont need it as option
'''
if pn != np.nan and np.abs(pn)>0:
pn_curr = np.zeros(len(data[stim_num][name]))
idx = np.where(data_names[:,0] == 'Current-1')[0][0]
for j in range(int(np.abs(pn))):
onset = data_arrays[protocol + ' onset times'][sweep-j-1]
t0 = int(onset / dt)
t1 = int(onset/dt + len(data[stim_num]['Current-1']))
pn_curr += data_traces[int(idx),t0:t1]
data[stim_num]['Current-2'] = data[stim_num]['Current-1'] - pn/np.abs(pn)*pn_curr #- data[stim_num][name][0] - pn_curr[0]
'''
this one saves the complete metadata in EVERY line
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!THINK OF SOMETHING BETTER!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
'''
tag_id = None
for key in tag_id_times.keys():
if (data[stim_num]['onset time'] >= tag_id_times[key][0]) and (data[stim_num]['onset time'] <= tag_id_times[key][1]):
tag_id = key
# # save metadata
data[stim_num]['block_meta'] = block_metadata[list(block_metadata.keys())[0]]
data[stim_num]['tag_meta'] = tag_metadata[tag_id]
# add block id
data[stim_num]['block_id'] = list(block_metadata.keys())[0]
data[stim_num]['tag_id'] = tag_id
data = pd.DataFrame(data)
if savefile != False:
if savefile == True:
savefile = nixfile.split('/')[-1].split('.nix')[0]
with open(savefile + '_dataframe.pickle', 'wb') as f:
pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol
# embed()
return data
def NixToFrame(folder):
'''
searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder
:param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files
'''
if folder[-1] != '/':
folder = folder + '/'
dirlist = os.listdir(folder)
for dir in dirlist:
if os.path.isdir(folder + dir):
for file in os.listdir(folder+dir):
if '.nix' in file:
print(file)
DataFrame(folder+dir+'/'+file, True, folder)
def load_data(filename):
with open(filename, 'rb') as f:
data = pickle.load(f) # load data with pickle
return data
def getMetadataDict(metadata):
def unpackMetadata(sec):
metadata = dict()
metadata = {prop.name: sec[prop.name] for prop in sec.props}
if hasattr(sec, 'sections') and len(sec.sections) > 0:
metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections})
return metadata
return unpackMetadata(metadata)