447 lines
18 KiB
Python
447 lines
18 KiB
Python
import nixio as nix
|
|
import numpy as np
|
|
import os
|
|
import pandas as pd
|
|
import pickle
|
|
from copy import deepcopy
|
|
|
|
'''
|
|
This File is meant to convert .nix Files into a more readable format (pandas.DataFrame)
|
|
|
|
NixToFrame(<folder>) searches all folders in <folder> for .nix files and converts them with
|
|
DataFrame(<nixfile>) and saves them as picklefile
|
|
|
|
df = load_data(<filename>) can load these picklefiles
|
|
'''
|
|
|
|
|
|
def DataFrame(nixfile, before=0.001, after=0.001, savefile=False, saveto='./', mindepth=0, delimiter=';'):
|
|
'''
|
|
opens a nix file, extracts the data and converts it to a pandas.DataFrame
|
|
|
|
:param nixfile (string): path and name of .nix file
|
|
:param before (float): time before sweeps that is saved, too (in the unit that is given in the file. Usually [s])
|
|
:param after (float): time after sweeps that is saved, too (in the unit that is given in the file. Usually [s])
|
|
:param savefile (string, bool): if not False and a string, the dataframe will be saved as <savefile>.pickle, if True it will be saved as nixfile.split('.nix')[0]
|
|
:param saveto (string): path to save the files in path defined by saveto
|
|
:param mindepth (int): minimum number of nested entries in dataframe
|
|
:param delimiter (string): internally used to create the nested dataframe, use a delimiter that does not naturally occur in your protocol names
|
|
|
|
:return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data
|
|
'''
|
|
|
|
try:
|
|
block = nix.File.open(nixfile,'r').blocks[0]
|
|
except:
|
|
print('cannot open ' + nixfile.split('/')[-1] + ', skip it')
|
|
return None
|
|
|
|
data_arrays = block.data_arrays
|
|
names = [data_arrays[i].name for i in range(len(data_arrays))]
|
|
shapes = [x.shape for x in data_arrays]
|
|
data_names = np.array([[x, i] for i, x in enumerate(names) if (shapes[i][0] >= 0.999 * shapes[0][0])])
|
|
header_names = [mt.name for mt in block.multi_tags]
|
|
|
|
try:
|
|
data_traces = np.array([data_arrays[name][:] for name, idx in data_names])
|
|
except:
|
|
print(nixfile.split('/')[-1] + ' has corrputed data_arrays, no file will be saved')
|
|
return None
|
|
|
|
time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0])
|
|
dt = time[1]-time[0]
|
|
|
|
block_metadata = {}
|
|
block_metadata[block.id] = GetMetadataDict(block.metadata)
|
|
|
|
tag = block.tags
|
|
tag_metadata = {}
|
|
tag_id_times = {}
|
|
protocols = [[]] * len(tag)
|
|
for i,t in enumerate(tag):
|
|
protocols[i] = {}
|
|
protocols[i]['id'] = t.id
|
|
protocols[i]['name'] = t.name
|
|
protocols[i]['position'] = t.position[0]
|
|
protocols = pd.DataFrame(protocols)
|
|
|
|
for i in range(len(tag)):
|
|
try:
|
|
meta = tag[i].metadata
|
|
tag_metadata[meta.id] = GetMetadataDict(meta)
|
|
tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]]
|
|
except:
|
|
print(nixfile.split('/')[-1] + ' has no tags, no file will be saved')
|
|
return None
|
|
|
|
protocol_idcs_old = np.where([(' onset times' in name) for name in names])[0]
|
|
durations_idcs_old = np.where([(' durations' in name) for name in names])[0]
|
|
for idx in protocol_idcs_old:
|
|
names[idx] = names[idx].split(' onset times')[0] + '_onset_times'
|
|
for idx in durations_idcs_old:
|
|
names[idx] = names[idx].split(' durations')[0] + '_durations'
|
|
|
|
protocol_idcs = np.where([('_onset_times' in name) for name in names])[0]
|
|
|
|
if len(protocol_idcs) == 0:
|
|
print(nixfile.split('/')[-1] + ' is empty, no file will be saved')
|
|
return None
|
|
|
|
data_df = []
|
|
for hn in header_names:
|
|
idcs = np.where(np.array([hn in name[:len(hn)] for name in names], dtype=int))[0]
|
|
for j,idx in enumerate(idcs):
|
|
data = data_arrays[int(idx)][:]
|
|
if j == 0:
|
|
i0 = len(data_df)
|
|
protname = hn.split('-')[0]
|
|
typename = hn.split('-')[1]
|
|
if protname == 'VC=':
|
|
continue
|
|
|
|
data_df.extend([[]]*len(data))
|
|
for jj in range(len(data)):
|
|
data_df[i0 + jj] = {}
|
|
data_df[i0 + jj]['type'] = typename
|
|
# data_df[i0 + jj]['row_id'] = uuid.uuid4()
|
|
if typename == 'PNSubatraction':
|
|
data_df[i0 + jj]['type'] = 'PNSubtraction'
|
|
elif typename == 'Qualitycontrol':
|
|
data_df[i0 + jj]['type'] = 'QualityControl'
|
|
|
|
dataname = names[idx].split(hn)[1][1:]
|
|
if protname == 'VC=':
|
|
continue
|
|
if dataname == 'Current-1':
|
|
continue
|
|
for jj in range(len(data)):
|
|
data_df[i0 + jj][dataname] = data[jj]
|
|
|
|
keys = np.unique([list(d.keys()) for d in data_df])
|
|
k = []
|
|
for key in keys:
|
|
k.extend(key)
|
|
keys = np.unique(k)
|
|
if 'repro_tag_id' not in keys:
|
|
for i in range(len(data_df)):
|
|
data_df[i]['repro_tag_id'] = protocols[~(protocols.position >= data_df[i]['onset_times'])]['id'].iloc[-1]
|
|
|
|
traces_idx = np.where([("data.sampled" in d.type) or ("data.events" in d.type) for d in data_arrays])[0]
|
|
traces_df = []
|
|
|
|
|
|
for i,idx in enumerate(traces_idx):
|
|
for j in range(len(data_df)):
|
|
if i == 0:
|
|
traces_df.append({})
|
|
|
|
if "data.sampled" in data_arrays[names[int(idx)]].type:
|
|
idx0 = int((data_df[j]['onset_times'] - before) / dt)
|
|
idx1 = int((data_df[j]['onset_times'] + data_df[j]['durations'] + after) / dt)
|
|
if idx0>=idx1:
|
|
traces_df[j][names[idx]] = np.array([np.nan])
|
|
else:
|
|
traces_df[j][names[idx]] = data_arrays[names[int(idx)]][idx0:idx1]
|
|
elif "data.events" in data_arrays[names[int(idx)]].type:
|
|
idx0 = int((data_df[j]['onset_times'] - before) / dt)
|
|
idx1 = int((data_df[j]['onset_times'] + data_df[j]['durations'] + after) / dt)
|
|
t0 = data_df[j]['onset_times'] - before
|
|
t1 = data_df[j]['onset_times'] + data_df[j]['durations'] + after
|
|
if t0>=t1:
|
|
traces_df[j][names[idx]] = np.array([np.nan])
|
|
else:
|
|
arr = data_arrays[names[int(idx)]][:]
|
|
traces_df[j][names[idx]] = arr[(arr>=t0) & (arr<=t1)] - data_df[j]['onset_times']
|
|
|
|
if i == 0:
|
|
traces_df[j]['time'] = time[idx0:idx1] - data_df[j]['onset_times']
|
|
traces_df[j]['time_before_stimulus'] = before
|
|
traces_df[j]['time_after_stimulus'] = after
|
|
traces_df[j]['samplingrate'] = 1 / dt
|
|
traces_df[j]['meta_id'] = list(block_metadata.keys())[0]
|
|
|
|
if type(data_df[j]['repro_tag_id']) == bytes:
|
|
data_df[j]['repro_tag_id'] = data_df[j]['repro_tag_id'].decode("utf-8")
|
|
traces_df[j]['protocol'] = np.array(protocols[protocols.id == data_df[j]['repro_tag_id']].name)[0].split('_')[0]
|
|
traces_df[j]['protocol_number'] = np.array(protocols[protocols.id == str(data_df[j]['repro_tag_id'])].name)[0].split('_')[1]
|
|
traces_df[j]['id'] = data_df[j]['repro_tag_id']
|
|
|
|
metadic = {}
|
|
for i,key in enumerate(protocols.id):
|
|
d = GetMetadataDict(tag[key].metadata)
|
|
if (len(d.keys()) == 1) and ('RePro-Info' in list(d.keys())[0]):
|
|
d = d['RePro-Info']
|
|
metadic[key] = DicCrawl(d, key='tag_meta', delimiter=delimiter)
|
|
metadic[key].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter))
|
|
meta_df = [[]]*len(data_df)
|
|
for i in range(len(data_df)):
|
|
meta_df[i] = metadic[str(data_df[i]['repro_tag_id'])]
|
|
|
|
dics = [meta_df, data_df, traces_df]
|
|
old_maxcount = mindepth
|
|
|
|
for k in range(len(dics)):
|
|
for j in range(len(dics[k])):
|
|
keys = list(dics[k][j].keys())
|
|
|
|
counts = np.array([key.count(delimiter) for key in keys])
|
|
maxcount = np.max(counts)
|
|
if maxcount < mindepth:
|
|
maxcount = mindepth
|
|
|
|
if maxcount < old_maxcount:
|
|
maxcount = old_maxcount
|
|
|
|
# append delimiters to the keys until each key contains the same number of keys
|
|
for i, key in enumerate(keys):
|
|
add = maxcount - counts[i]
|
|
newkey=key
|
|
for ii in range(add):
|
|
newkey += delimiter
|
|
dics[k][j][newkey] = dics[k][j].pop(key)
|
|
old_maxcount = maxcount
|
|
|
|
data_df = pd.DataFrame(data_df)
|
|
traces_df = pd.DataFrame(traces_df)
|
|
meta_df = pd.DataFrame(meta_df)
|
|
|
|
data = pd.concat([meta_df, data_df, traces_df], axis=1)
|
|
data.columns = data.columns.str.split(';', expand=True)
|
|
|
|
if savefile != False:
|
|
if savefile == True:
|
|
savefile = nixfile.split('.nix')[0]
|
|
if saveto != None:
|
|
savefile = savefile.split('/')[-1]
|
|
savefile = saveto+savefile
|
|
|
|
with open(savefile + '_dataframe.pickle', 'wb') as f:
|
|
pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol
|
|
return data
|
|
|
|
|
|
def NixToFrame(folder, before=0.0, after=0.0, skipold=True, saveto=None, mindepth=0):
|
|
'''
|
|
searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder
|
|
|
|
:param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files
|
|
:param skipold (bool): skip creation of dataframe if it already eists
|
|
:param saveto (string): path where files should be saved, if None it saves in the folder, where the .nix file is
|
|
'''
|
|
if folder[-1] != '/':
|
|
folder = folder + '/'
|
|
|
|
dirlist = os.listdir(folder)
|
|
for dir in dirlist:
|
|
if os.path.isdir(folder + dir):
|
|
for file in os.listdir(folder+dir):
|
|
if ('.nix' in file):
|
|
if skipold == True:
|
|
if np.sum(['_dataframe.pickle' in x for x in os.listdir(folder+dir)]) >= 1:
|
|
print('skip ' + file)
|
|
continue
|
|
print(file)
|
|
DataFrame(folder+dir+'/'+file, before, after, True, saveto, mindepth=mindepth)
|
|
|
|
|
|
def load_data(filename):
|
|
with open(filename, 'rb') as f:
|
|
data = pickle.load(f) # load data with pickle
|
|
return data
|
|
|
|
|
|
def GetMetadataDict(metadata):
|
|
def unpackMetadata(sec):
|
|
metadata = dict()
|
|
metadata = {prop.name: sec[prop.name] for prop in sec.props}
|
|
if hasattr(sec, 'sections') and len(sec.sections) > 0:
|
|
metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections})
|
|
return metadata
|
|
return unpackMetadata(metadata)
|
|
|
|
|
|
def DicCrawl(dic, key='', delimiter=';'):
|
|
keylist = GetKeyList(dic, key=key, delimiter=delimiter)
|
|
newdic = KeylistToDic(keylist)
|
|
return newdic
|
|
|
|
|
|
def GetKeyList(dic, keylist=[], key='', delimiter=';'):
|
|
items = dic.items()
|
|
for item in items:
|
|
if type(item[1]) == dict:
|
|
if len(key) == 0:
|
|
keylist = GetKeyList(item[1], keylist, str(item[0]), delimiter)
|
|
else:
|
|
keylist = GetKeyList(item[1], keylist, key + delimiter + str(item[0]), delimiter)
|
|
else:
|
|
if len(key) == 0:
|
|
keylist.append([str(item[0]), item[1]])
|
|
else:
|
|
keylist.append([key + delimiter + str(item[0]), item[1]])
|
|
return keylist
|
|
|
|
|
|
def KeylistToDic(keylist):
|
|
dic = {}
|
|
for item in keylist:
|
|
dic[item[0]] = item[1]
|
|
return dic
|
|
|
|
|
|
def PNSubtraction(df, currenttrace='Current-1', newcurrenttrace='Current-2', kernelcurrenttrace='Current-3', delimiter=';'):
|
|
'''
|
|
Only for VoltageClamp experiments and WILL BE OBSOLETE, SOON
|
|
|
|
:param df (pandas.DataFrame): NixFrame.DataFrame
|
|
:param currenttrace:
|
|
:param newcurrenttrace:
|
|
:param kernelcurrenttrace:
|
|
:param delimiter:
|
|
:return:
|
|
'''
|
|
|
|
df = deepcopy(df)
|
|
if 'TraceId' not in df.columns:
|
|
for id in np.unique(df.repro_tag_id):
|
|
dfp = df[df.repro_tag_id == id]
|
|
if np.isnan(dfp.pn.iloc[0]) or dfp.pn.iloc[0] == 0:
|
|
continue
|
|
|
|
pn = int(dfp.pn.iloc[0])
|
|
for i in np.arange(0,len(dfp),np.abs(pn)+1, dtype=int):
|
|
locs_pn = df[df.repro_tag_id == id].iloc[i:i+np.abs(pn)].index
|
|
loc = locs_pn[-1] + 1
|
|
|
|
if loc <= df[df.repro_tag_id == id].index[-1]:
|
|
df.loc[locs_pn, 'type'] = 'PNSubtraction'
|
|
df.loc[loc, 'type'] = 'Trace'
|
|
|
|
|
|
trace_idx = np.where(df.type == 'Trace')[0]
|
|
currentdic = [[]] * len(trace_idx)
|
|
kerneldic = [[]] * len(trace_idx)
|
|
affix = ''
|
|
affix_num = len(list(df)[0]) - 1
|
|
for i in range(affix_num):
|
|
affix += delimiter
|
|
|
|
for idx,i in enumerate(trace_idx):
|
|
if np.isnan(df.pn.iloc[i]) or df.pn.iloc[i] == 0:
|
|
continue
|
|
|
|
if 'TraceId' in df.columns:
|
|
trace_id = df.TraceId.iloc[i]
|
|
idxvec = np.where((df.TraceId == trace_id) & (df.type == 'PNSubtraction'))[0]
|
|
else:
|
|
delays = (df['onset_times'][df.type == 'PNSubtraction']) - df['onset_times'].iloc[i]
|
|
maxidx = delays[np.array(delays)<0].index[-1]
|
|
idxvec = np.arange(maxidx-np.abs(df.pn.iloc[i])+1, maxidx+.1, dtype=int)
|
|
|
|
pn_traces = hf.get_traces(currenttrace, df.iloc[idxvec])
|
|
I_trace = np.array(df[currenttrace].iloc[i])
|
|
|
|
pn_traces = np.mean(pn_traces, axis=1)
|
|
pn_traces -= np.mean(pn_traces[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)])
|
|
pn_trace = pn_traces*df.pn.iloc[i]
|
|
I_trace -= np.mean(I_trace[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)])
|
|
|
|
''' try kernels '''
|
|
Vpn_trace = hf.get_traces('V-1', df.iloc[idxvec])
|
|
tpn = hf.get_traces('time', df.iloc[idxvec])[:, 0]
|
|
Vtr_trace = np.array(df['V-1'].iloc[i])
|
|
ttr = np.array(df['time'].iloc[i])
|
|
kernel_length = 3
|
|
idx0 = np.nanargmin(np.abs(tpn)) - 1
|
|
idx1 = np.nanargmin(np.abs(tpn - kernel_length / 1000))
|
|
idx40 = np.nanargmin(np.abs(tpn - 0.04))
|
|
idx60 = np.nanargmin(np.abs(tpn - 0.06))
|
|
|
|
Vpn = tpn * 0.0 + np.mean(np.mean(Vpn_trace, axis=1)[:idx0])
|
|
Vpn[tpn >= 0] = np.mean(np.mean(Vpn_trace, axis=1)[idx40:idx60])
|
|
Vtr = ttr * 0.0 + np.mean(Vtr_trace[:idx0])
|
|
Vtr[ttr >= 0] = np.mean(Vtr_trace[idx40:idx60])
|
|
|
|
ftinput = np.fft.fft(np.diff(Vpn[idx0:idx1]))
|
|
ftoutput = np.fft.fft(pn_traces[idx0:idx1 - 1])
|
|
kernel = np.fft.ifft(ftoutput / ftinput)
|
|
kernel = np.append(kernel, np.zeros(len(I_trace) - len(kernel))+np.mean(kernel[-20:]))
|
|
kerneldic[idx] = {}
|
|
kerneldic[idx][kernelcurrenttrace + affix] = I_trace - np.convolve(np.diff(Vtr), kernel)[:len(I_trace)] #- np.convolve(np.diff(Vtr),kernel2)[:len(I_trace)]
|
|
''' end of kernels '''
|
|
|
|
|
|
if len(pn_trace) < len(I_trace):
|
|
I_trace = I_trace[:len(pn_trace)]
|
|
elif len(pn_trace) > len(I_trace):
|
|
pn_trace = pn_trace[:len(I_trace)]
|
|
|
|
currentdic[idx] = {}
|
|
currentdic[idx][newcurrenttrace+affix] = I_trace - pn_trace
|
|
|
|
currentdf = pd.DataFrame(currentdic, index=df[df.type == 'Trace'].index)
|
|
currentdf.columns = currentdf.columns.str.split(';', expand=True)
|
|
df = pd.concat([df, currentdf], axis=1)
|
|
df = df.drop(index = df[df.type == 'PNSubtraction'].index)
|
|
return df
|
|
|
|
|
|
def QualityControl(df, currenttrace='Current-1', potentialtrace='V-1', delimiter=';'):
|
|
'''
|
|
only for VoltageClamp experiments
|
|
'''
|
|
|
|
qc = [[]]*len(df[df.type == 'QualityControl'])
|
|
prefix = 'qualitycontrol' + delimiter
|
|
affix = ''
|
|
affix_num = len(list(df)[0]) - 2
|
|
for i in range(affix_num):
|
|
affix += delimiter
|
|
|
|
# assume that stimulus has 10ms at holdingpotential followed by 10ms at holdingpotential-20
|
|
samplingrate = df.samplingrate.iloc[0]
|
|
idx10ms = int(0.01 * samplingrate)
|
|
idx20ms = int(0.02 * samplingrate)
|
|
idx1ms = int(0.001 * samplingrate)
|
|
|
|
for i in range(len(qc)):
|
|
qc[i] = {}
|
|
qc[i][prefix + currenttrace + affix] = df[df.type == 'QualityControl'][currenttrace].iloc[i]
|
|
qc[i][prefix + potentialtrace + affix] = df[df.type == 'QualityControl'][potentialtrace].iloc[i]
|
|
qc[i][prefix + 'onset_times' + affix] = df[df.type == 'QualityControl']['onset_times'].iloc[i]
|
|
qc[i][prefix + 'holdingpotential' + affix] = df[df.type == 'QualityControl']['tag_meta']['settings']['holdingpotential'].iloc[i]
|
|
if 'TraceId' in df.columns:
|
|
qc[i][prefix + 'TraceId' + affix] = df[df.type == 'QualityControl']['TraceId'].iloc[i]
|
|
|
|
I1 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx10ms-idx1ms : idx10ms - 2])
|
|
I2 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx20ms-idx1ms : idx20ms - 2])
|
|
R = 20/(I1-I2)
|
|
qc[i][prefix + 'resistance' + affix] = R
|
|
qc_df = pd.DataFrame(qc)
|
|
|
|
if len(qc) == 0:
|
|
return df
|
|
|
|
qc_empty = [{}]
|
|
for key in qc[0].keys():
|
|
qc_empty[0][key] = np.nan
|
|
qc_dic = qc_empty * len(df)
|
|
|
|
for idx in df[df.type == 'Trace'].index:
|
|
i = np.where(df.index == idx)[0][0]
|
|
if 'TraceId' in df.columns:
|
|
qc_dic[i] = qc[np.where(qc_df[prefix + 'TraceId' + affix] == df.TraceId.loc[idx])[0][0]]
|
|
else:
|
|
None
|
|
delays = qc_df[prefix + 'onset_times' + affix] - df['onset_times'].loc[idx]
|
|
maxidx = np.where(delays[np.array(delays) < 0])[0][0]
|
|
qc_dic[i] = qc[maxidx]
|
|
|
|
qc = pd.DataFrame(qc_dic, index=df.index)
|
|
qc.columns = qc.columns.str.split(';', expand=True)
|
|
|
|
df = pd.concat([df, qc], axis=1)
|
|
df = df.drop(index=df[df.type == 'QualityControl'].index)
|
|
return df
|
|
|