import nixio as nix from IPython import embed import numpy as np import os import pandas as pd import pickle import helperfunctions as hf from copy import deepcopy import gc def DataFrame(nixfile, before=0.001, after=0.001, savefile=False, saveto='./', mindepth=0, delimiter=';'): ''' opens a nix file, extracts the data and converts it to a pandas.DataFrame :param nixfile (string): path and name of .nix file :param overlap (float): time before and after sweeps that is saved, too :param savefile (string): if not False, the dataframe will be saved as .pickle :param saveto (string): path to save the files in NOT IMPLEMENTED YET :return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data ''' try: block = nix.File.open(nixfile,'r').blocks[0] except: print('cannot open ' + nixfile.split('/')[-1] + ', skip it') return None data_arrays = block.data_arrays names = [data_arrays[i].name for i in range(len(data_arrays))] shapes = [x.shape for x in data_arrays] data_names = np.array([[x, i] for i, x in enumerate(names) if (shapes[i][0] >= 0.999 * shapes[0][0])]) header_names = [mt.name for mt in block.multi_tags] try: data_traces = np.array([data_arrays[name][:] for name, idx in data_names]) except: print(nixfile.split('/')[-1] + ' has corrputed data_arrays, no file will be saved') return None time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0]) dt = time[1]-time[0] block_metadata = {} block_metadata[block.id] = GetMetadataDict(block.metadata) tag = block.tags tag_metadata = {} tag_id_times = {} protocols = [[]] * len(tag) for i,t in enumerate(tag): protocols[i] = {} protocols[i]['id'] = t.id protocols[i]['name'] = t.name protocols[i]['position'] = t.position[0] protocols = pd.DataFrame(protocols) for i in range(len(tag)): try: meta = tag[i].metadata tag_metadata[meta.id] = GetMetadataDict(meta) # tag_metadata[meta.id].update(DicCrawl(GetMetadataDict(meta), mindepth=mindepth)) tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]] except: print(nixfile.split('/')[-1] + ' has no tags, no file will be saved') return None protocol_idcs = np.where([(' onset times' in name) for name in names])[0] if len(protocol_idcs) == 0: print(nixfile.split('/')[-1] + ' is empty, no file will be saved') return None data_df = [] for hn in header_names: idcs = np.where(np.array([hn in name[:len(hn)] for name in names], dtype=int))[0] for j,idx in enumerate(idcs): data = data_arrays[int(idx)][:] if j == 0: i0 = len(data_df) protname = hn.split('-')[0] typename = hn.split('-')[1] if protname == 'VC=': continue data_df.extend([[]]*len(data)) for jj in range(len(data)): data_df[i0 + jj] = {} data_df[i0 + jj]['type'] = typename # data_df[i0 + jj]['row_id'] = uuid.uuid4() if typename == 'PNSubatraction': data_df[i0 + jj]['type'] = 'PNSubtraction' elif typename == 'Qualitycontrol': data_df[i0 + jj]['type'] = 'QualityControl' dataname = names[idx].split(hn)[1][1:] if protname == 'VC=': continue if dataname == 'Current-1': continue for jj in range(len(data)): data_df[i0 + jj][dataname] = data[jj] keys = np.unique([list(d.keys()) for d in data_df]) k = [] for key in keys: k.extend(key) keys = np.unique(k) if 'repro_tag_id' not in keys: for i in range(len(data_df)): data_df[i]['repro_tag_id'] = protocols[~(protocols.position >= data_df[i]['onset times'])]['id'].iloc[-1] traces_idx = np.where([("data.sampled" in d.type) or ("data.events" in d.type) for d in data_arrays])[0] traces_df = [] for i,idx in enumerate(traces_idx): # print(i,i/len(traces_idx)) for j in range(len(data_df)): if i == 0: traces_df.append({}) if "data.sampled" in data_arrays[names[int(idx)]].type: idx0 = int((data_df[j]['onset times'] - before) / dt) idx1 = int((data_df[j]['onset times'] + data_df[j]['durations'] + after) / dt + 1) # traces_df[names[idx]] = data_arrays[int(idx)][:][idx0:idx1] traces_df[j][names[idx]] = data_arrays[names[int(idx)]][idx0:idx1] elif "data.events" in data_arrays[names[int(idx)]].type: t0 = data_df[j]['onset times'] - before t1 = data_df[j]['onset times'] + data_df[j]['durations'] + after arr = data_arrays[names[int(idx)]][:] traces_df[j][names[idx]] = arr[(arr>=t0) & (arr<=t1)] if i == 0: traces_df[j]['time'] = time[idx0:idx1] - data_df[j]['onset times'] traces_df[j]['time_before_stimulus'] = before traces_df[j]['time_after_stimulus'] = after traces_df[j]['samplingrate'] = 1 / dt traces_df[j]['meta_id'] = list(block_metadata.keys())[0] if type(data_df[j]['repro_tag_id']) == bytes: data_df[j]['repro_tag_id'] = data_df[j]['repro_tag_id'].decode("utf-8") traces_df[j]['protocol'] = np.array(protocols[protocols.id == data_df[j]['repro_tag_id']].name)[0].split('_')[0] traces_df[j]['protocol_number'] = np.array(protocols[protocols.id == str(data_df[j]['repro_tag_id'])].name)[0].split('_')[1] traces_df[j]['id'] = data_df[j]['repro_tag_id'] metadic = {} print('meta') for i,key in enumerate(protocols.id): d = GetMetadataDict(tag[key].metadata) if (len(d.keys()) == 1) and ('RePro-Info' in list(d.keys())[0]): d = d['RePro-Info'] metadic[key] = DicCrawl(d, key='tag_meta', delimiter=delimiter) metadic[key].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter)) meta_df = [[]]*len(data_df) for i in range(len(data_df)): meta_df[i] = metadic[str(data_df[i]['repro_tag_id'])] dics = [meta_df, data_df, traces_df] # old_maxcount = mindepth # for i in range(len(protocol_idcs)): # protocol = names[protocol_idcs[i]].split(' onset times')[0] # # #skip certain protocols # if 'VC=' in protocol: # # print('skip this protocol') # continue # #number of meta data entries # if i == len(protocol_idcs)-1: # meta_len = len(names) - protocol_idcs[i] # else: # meta_len = protocol_idcs[i+1] - protocol_idcs[i] # # #get new line for every sweep and save the data, make a pn subtraction if necessary # if any([protocol + '_pn' == string for string in names[protocol_idcs[i]:protocol_idcs[i]+meta_len]]): # pn = data_arrays[protocol + '_pn'][0] # sweeps = np.arange(np.abs(pn),len(data_arrays[int(protocol_idcs[i])][:]),(np.abs(pn)+1), dtype=int) # else: # pn = np.nan # sweeps = np.arange(len(data_arrays[int(protocol_idcs[i])][:]), dtype=int) # # for sweep in sweeps: # stim_num +=1 # data.append({}) # # # save protocol names # split_vec = protocol.split('-') # if len(split_vec)>2: # prot_name = split_vec[0] # prot_num = int(split_vec[-1]) # for j in range(len(split_vec)-2): # prot_name += '-' + split_vec[j+1] # else: # prot_name = split_vec[0] # prot_num = split_vec[-1] # data[stim_num]['protocol'] = prot_name # data[stim_num]['protocol_number'] = prot_num # data[stim_num]['time_before_stimulus'] = before # data[stim_num]['time_after_stimulus'] = after # # #save id # data[stim_num]['id'] = data_arrays[int(protocol_idcs[i])].id # # #save rest of stored data # for idx in range(meta_len): # j = int(protocol_idcs[i] + idx) # if (' durations' in names[j]) or (' onset times' in names[j]): # continue # if len(data_arrays[j][sweep]) == 1: # data[stim_num][names[j].split(protocol + '_')[-1]] = data_arrays[j][sweep][0] # else: # data[stim_num][names[j].split(protocol+'_')[-1]] = data_arrays[j][sweep] # data[stim_num]['samplingrate'] = 1/dt # # #save data arrays # onset = data_arrays[protocol + ' onset times'][sweep] # dur = data_arrays[protocol + ' durations'][sweep] # t0 = int((onset-before)/dt) # t1 = int((onset+after+dur)/dt+1) # data[stim_num]['onset time'] = onset # data[stim_num]['duration'] = dur # # for name,idx in data_names: # data[stim_num][name] = data_traces[int(idx)][t0:t1] # # for j in np.arange(int(idx)+1,protocol_idcs[0]): # bool_vec = (data_arrays[names[j]][:]>=onset) & (data_arrays[names[j]][:]<=onset+dur) # data[stim_num][names[j]] = np.array(data_arrays[names[j]])[bool_vec] # # data[stim_num]['time'] = time[t0:t1] - data[stim_num]['onset time'] # # #pn-subtraction (if necessary) # ''' # change the location of the pn (its already in the metadata, you dont need it as option # ''' # if pn != np.nan and np.abs(pn)>0: # pn_curr = np.zeros(len(data[stim_num][name])) # idx = np.where(data_names[:,0] == 'Current-1')[0][0] # for j in range(int(np.abs(pn))): # onset = data_arrays[protocol + ' onset times'][sweep-j-1] # t0 = int((onset-before) / dt) # t1 = int(t0 + len(pn_curr)) # # pn_curr += data_traces[int(idx),t0:t1] - np.mean(data_traces[int(idx),t0:int(t0+before/dt)]) # import matplotlib.pyplot as plt # plt.plot(data_traces[int(idx),t0:t1] - np.mean(data_traces[int(idx),t0:int(t0+before/dt)])) # # data[stim_num]['Current-2'] = data[stim_num]['Current-1'] - pn/np.abs(pn)*pn_curr - np.mean(data[stim_num]['Current-1'][0:int(before/dt)]) # # for mt in block.tags: # # if 'Trace' in mt.metadata.name: # print(mt.name) # # mt.metadata.pprint() # embed() # # ''' # this one saves the complete metadata in EVERY line # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!THINK OF SOMETHING BETTER!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # ''' # # tag_id = None # for key in tag_id_times.keys(): # if (data[stim_num]['onset time'] >= tag_id_times[key][0]) and (data[stim_num]['onset time'] <= tag_id_times[key][1]): # tag_id = key # # # save metadata # # data[stim_num]['block_meta'] = block_metadata[list(block_metadata.keys())[0]] # data[stim_num].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter)) # # if tag_id != None: # tagmetadata = tag_metadata[tag_id] # if (len(tagmetadata)==1) & (list(tagmetadata.keys())[0] == 'RePro-Info'): # # data[stim_num]['tag_meta'] = tag_metadata[tag_id]['RePro-Info'] # data[stim_num].update(DicCrawl(tag_metadata[tag_id]['RePro-Info'], key='tag_meta', delimiter=delimiter)) # else: # # data[stim_num]['tag_meta'] = tag_metadata[tag_id] # data[stim_num].update(DicCrawl(tag_metadata[tag_id], delimiter=delimiter)) # # elif tag_id == None: # print('no tag metadata for ' + nixfile.split('/')[-1] + ', ' + protocol) # # # add block id # data[stim_num]['block_id'] = list(block_metadata.keys())[0] # data[stim_num]['tag_id'] = tag_id # add delimiters at end of keys to be able to correctly split dataframe # count maximum depth of the keylist (count max. of delimiters in a row) old_maxcount = mindepth for k in range(len(dics)): for j in range(len(dics[k])): keys = list(dics[k][j].keys()) counts = np.array([key.count(delimiter) for key in keys]) maxcount = np.max(counts) if maxcount < mindepth: maxcount = mindepth if maxcount < old_maxcount: maxcount = old_maxcount # append delimiters to the keys until each key contains the same number of keys for i, key in enumerate(keys): add = maxcount - counts[i] newkey=key for ii in range(add): newkey += delimiter dics[k][j][newkey] = dics[k][j].pop(key) old_maxcount = maxcount data_df = pd.DataFrame(data_df) traces_df = pd.DataFrame(traces_df) meta_df = pd.DataFrame(meta_df) data = pd.concat([meta_df, data_df, traces_df], axis=1, sort=False) data.columns = data.columns.str.split(';', expand=True) if savefile != False: if savefile == True: # savefile = nixfile.split('/')[-1].split('.nix')[0] savefile = nixfile.split('.nix')[0] if saveto != None: savefile = savefile.split('/')[-1] savefile = saveto+savefile with open(savefile + '_dataframe.pickle', 'wb') as f: pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol return data def NixToFrame(folder, before=0.0, after=0.0, skipold=True, saveto=None, mindepth=0): ''' searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder :param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files :param skipold (bool): skip creation of dataframe if it already eists :param saveto (string): path where files should be saved, if None it saves in the folder, where the .nix file is ''' if folder[-1] != '/': folder = folder + '/' dirlist = os.listdir(folder) for dir in dirlist: if os.path.isdir(folder + dir): for file in os.listdir(folder+dir): if ('.nix' in file): if skipold == True: if np.sum(['_dataframe.pickle' in x for x in os.listdir(folder+dir)]) >= 1: print('skip ' + file) continue print(file) DataFrame(folder+dir+'/'+file, before, after, True, saveto, mindepth=mindepth) def GetMetadataDict(metadata): def unpackMetadata(sec): metadata = dict() metadata = {prop.name: sec[prop.name] for prop in sec.props} if hasattr(sec, 'sections') and len(sec.sections) > 0: metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections}) return metadata return unpackMetadata(metadata) def DicCrawl(dic, key='', delimiter=';'): keylist = GetKeyList(dic, key=key, delimiter=delimiter) newdic = KeylistToDic(keylist) return newdic def GetKeyList(dic, keylist=[], key='', delimiter=';'): items = dic.items() for item in items: if type(item[1]) == dict: if len(key) == 0: keylist = GetKeyList(item[1], keylist, str(item[0]), delimiter) else: keylist = GetKeyList(item[1], keylist, key + delimiter + str(item[0]), delimiter) else: if len(key) == 0: keylist.append([str(item[0]), item[1]]) else: keylist.append([key + delimiter + str(item[0]), item[1]]) return keylist def KeylistToDic(keylist): dic = {} for item in keylist: dic[item[0]] = item[1] return dic def load_data(filename): with open(filename, 'rb') as f: data = pickle.load(f) # load data with pickle return data def PNSubtraction(df, currenttrace='Current-1', newcurrenttrace='Current-2', kernelcurrenttrace='Current-3', delimiter=';'): df = deepcopy(df) embed() if 'TraceId' not in df.columns: for id in np.unique(df.repro_tag_id): dfp = df[df.repro_tag_id == id] if np.isnan(dfp.pn.iloc[0]) or dfp.pn.iloc[0] == 0: continue pn = int(dfp.pn.iloc[0]) for i in np.arange(0,len(dfp),np.abs(pn)+1, dtype=int): locs_pn = df[df.repro_tag_id == id].iloc[i:i+np.abs(pn)].index loc = locs_pn[-1] + 1 if loc <= df[df.repro_tag_id == id].index[-1]: df.loc[locs_pn, 'type'] = 'PNSubtraction' df.loc[loc, 'type'] = 'Trace' trace_idx = np.where(df.type == 'Trace')[0] currentdic = [[]] * len(trace_idx) kerneldic = [[]] * len(trace_idx) affix = '' affix_num = len(list(df)[0]) - 1 for i in range(affix_num): affix += delimiter for idx,i in enumerate(trace_idx): if np.isnan(df.pn.iloc[i]) or df.pn.iloc[i] == 0: continue if 'TraceId' in df.columns: trace_id = df.TraceId.iloc[i] idxvec = np.where((df.TraceId == trace_id) & (df.type == 'PNSubtraction'))[0] else: delays = (df['onset times'][df.type == 'PNSubtraction']) - df['onset times'].iloc[i] maxidx = delays[np.array(delays)<0].index[-1] idxvec = np.arange(maxidx-np.abs(df.pn.iloc[i])+1, maxidx+.1, dtype=int) pn_traces = hf.get_traces(currenttrace, df.iloc[idxvec]) I_trace = np.array(df[currenttrace].iloc[i]) pn_traces = np.mean(pn_traces, axis=1) pn_traces -= np.mean(pn_traces[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) pn_trace = pn_traces*df.pn.iloc[i] I_trace -= np.mean(I_trace[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) ''' try kernels ''' Vpn_trace = hf.get_traces('V-1', df.iloc[idxvec]) tpn = hf.get_traces('time', df.iloc[idxvec])[:, 0] Vtr_trace = np.array(df['V-1'].iloc[i]) ttr = np.array(df['time'].iloc[i]) kernel_length = 3 idx0 = np.nanargmin(np.abs(tpn)) - 1 idx1 = np.nanargmin(np.abs(tpn - kernel_length / 1000)) idx40 = np.nanargmin(np.abs(tpn - 0.04)) idx60 = np.nanargmin(np.abs(tpn - 0.06)) Vpn = tpn * 0.0 + np.mean(np.mean(Vpn_trace, axis=1)[:idx0]) Vpn[tpn >= 0] = np.mean(np.mean(Vpn_trace, axis=1)[idx40:idx60]) Vtr = ttr * 0.0 + np.mean(Vtr_trace[:idx0]) Vtr[ttr >= 0] = np.mean(Vtr_trace[idx40:idx60]) ftinput = np.fft.fft(np.diff(Vpn[idx0:idx1])) ftoutput = np.fft.fft(pn_traces[idx0:idx1 - 1]) kernel = np.fft.ifft(ftoutput / ftinput) kernel = np.append(kernel, np.zeros(len(I_trace) - len(kernel))+np.mean(kernel[-20:])) # kernel = kernel - np.mean(kernel[-20:]) # kernel2 = ttr*0.0 # kernel2[ttr>=0] = np.mean(kernel[-20:]) kerneldic[idx] = {} kerneldic[idx][kernelcurrenttrace + affix] = I_trace - np.convolve(np.diff(Vtr), kernel)[:len(I_trace)] #- np.convolve(np.diff(Vtr),kernel2)[:len(I_trace)] ''' end of kernels ''' if len(pn_trace) < len(I_trace): # print(len(pn_traces) - len(I_trace)) I_trace = I_trace[:len(pn_trace)] elif len(pn_trace) > len(I_trace): # print(len(pn_traces) - len(I_trace)) pn_trace = pn_trace[:len(I_trace)] currentdic[idx] = {} currentdic[idx][newcurrenttrace+affix] = I_trace - pn_trace if df.protocol.iloc[i] == 'Recovery': import matplotlib.pyplot as plt plt.plot(I_trace,'k', label='original') plt.plot(kerneldic[idx][kernelcurrenttrace + affix], label='kernel') plt.plot(currentdic[idx][newcurrenttrace+affix],label='pn') plt.legend() plt.show() embed() # import matplotlib.pyplot as plt # if df.protocol.iloc[i] == 'Activation': # plt.figure() # plt.title(df.step.iloc[i]) # # plt.plot(hf.get_traces(currenttrace, df.iloc[idxvec]), 'k') # # plt.plot(np.array(df[currenttrace].iloc[i])) # plt.plot(pn_traces, 'k') # plt.plot(I_trace) # df[i][newcurrenttrace] = I_trace # plt.show() currentdf = pd.DataFrame(currentdic, index=df[df.type == 'Trace'].index) currentdf.columns = currentdf.columns.str.split(';', expand=True) df = pd.concat([df, currentdf], axis=1) df = df.drop(index = df[df.type == 'PNSubtraction'].index) return df def QualityControl(df, currenttrace='Current-1', potentialtrace='V-1', delimiter=';'): qc = [[]]*len(df[df.type == 'QualityControl']) prefix = 'qualitycontrol' + delimiter affix = '' affix_num = len(list(df)[0]) - 2 for i in range(affix_num): affix += delimiter # assume that stimulus has 10ms at holdingpotential followed by 10ms at holdingpotential-20 samplingrate = df.samplingrate.iloc[0] idx10ms = int(0.01 * samplingrate) idx20ms = int(0.02 * samplingrate) idx1ms = int(0.001 * samplingrate) for i in range(len(qc)): qc[i] = {} qc[i][prefix + currenttrace + affix] = df[df.type == 'QualityControl'][currenttrace].iloc[i] qc[i][prefix + potentialtrace + affix] = df[df.type == 'QualityControl'][potentialtrace].iloc[i] qc[i][prefix + 'onset times' + affix] = df[df.type == 'QualityControl']['onset times'].iloc[i] qc[i][prefix + 'holdingpotential' + affix] = df[df.type == 'QualityControl']['tag_meta']['settings']['holdingpotential'].iloc[i] if 'TraceId' in df.columns: qc[i][prefix + 'TraceId' + affix] = df[df.type == 'QualityControl']['TraceId'].iloc[i] I1 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx10ms-idx1ms : idx10ms - 2]) I2 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx20ms-idx1ms : idx20ms - 2]) R = 20/(I1-I2) qc[i][prefix + 'resistance' + affix] = R qc_df = pd.DataFrame(qc) if len(qc) == 0: return df qc_empty = [{}] for key in qc[0].keys(): qc_empty[0][key] = np.nan qc_dic = qc_empty * len(df) for idx in df[df.type == 'Trace'].index: i = np.where(df.index == idx)[0][0] if 'TraceId' in df.columns: qc_dic[i] = qc[np.where(qc_df[prefix + 'TraceId' + affix] == df.TraceId.loc[idx])[0][0]] else: None delays = qc_df[prefix + 'onset times' + affix] - df['onset times'].loc[idx] maxidx = np.where(delays[np.array(delays) < 0])[0][0] qc_dic[i] = qc[maxidx] qc = pd.DataFrame(qc_dic, index=df.index) qc.columns = qc.columns.str.split(';', expand=True) # try: df = pd.concat([df, qc], axis=1) # qc = qc.drop(index=np.where(df.type == 'QualityControl')[0]) df = df.drop(index=df[df.type == 'QualityControl'].index) # except: embed() return df