import nixio as nix import numpy as np import os import pandas as pd import pickle from copy import deepcopy ''' This File is meant to convert .nix Files into a more readable format (pandas.DataFrame) NixToFrame() searches all folders in for .nix files and converts them with DataFrame() and saves them as picklefile df = load_data() can load these picklefiles ''' def DataFrame(nixfile, before=0.001, after=0.001, savefile=False, saveto='./', mindepth=0, delimiter=';'): ''' opens a nix file, extracts the data and converts it to a pandas.DataFrame :param nixfile (string): path and name of .nix file :param before (float): time before sweeps that is saved, too (in the unit that is given in the file. Usually [s]) :param after (float): time after sweeps that is saved, too (in the unit that is given in the file. Usually [s]) :param savefile (string, bool): if not False and a string, the dataframe will be saved as .pickle, if True it will be saved as nixfile.split('.nix')[0] :param saveto (string): path to save the files in path defined by saveto :param mindepth (int): minimum number of nested entries in dataframe :param delimiter (string): internally used to create the nested dataframe, use a delimiter that does not naturally occur in your protocol names :return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data ''' try: block = nix.File.open(nixfile,'r').blocks[0] except: print('cannot open ' + nixfile.split('/')[-1] + ', skip it') return None data_arrays = block.data_arrays names = [data_arrays[i].name for i in range(len(data_arrays))] shapes = [x.shape for x in data_arrays] data_names = np.array([[x, i] for i, x in enumerate(names) if (shapes[i][0] >= 0.999 * shapes[0][0])]) header_names = [mt.name for mt in block.multi_tags] try: data_traces = np.array([data_arrays[name][:] for name, idx in data_names]) except: print(nixfile.split('/')[-1] + ' has corrputed data_arrays, no file will be saved') return None time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0]) dt = time[1]-time[0] block_metadata = {} block_metadata[block.id] = GetMetadataDict(block.metadata) tag = block.tags tag_metadata = {} tag_id_times = {} protocols = [[]] * len(tag) for i,t in enumerate(tag): protocols[i] = {} protocols[i]['id'] = t.id protocols[i]['name'] = t.name protocols[i]['position'] = t.position[0] protocols = pd.DataFrame(protocols) for i in range(len(tag)): try: meta = tag[i].metadata tag_metadata[meta.id] = GetMetadataDict(meta) tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]] except: print(nixfile.split('/')[-1] + ' has no tags, no file will be saved') return None protocol_idcs_old = np.where([(' onset times' in name) for name in names])[0] durations_idcs_old = np.where([(' durations' in name) for name in names])[0] for idx in protocol_idcs_old: names[idx] = names[idx].split(' onset times')[0] + '_onset_times' for idx in durations_idcs_old: names[idx] = names[idx].split(' durations')[0] + '_durations' protocol_idcs = np.where([('_onset_times' in name) for name in names])[0] if len(protocol_idcs) == 0: print(nixfile.split('/')[-1] + ' is empty, no file will be saved') return None data_df = [] for hn in header_names: idcs = np.where(np.array([hn in name[:len(hn)] for name in names], dtype=int))[0] for j,idx in enumerate(idcs): data = data_arrays[int(idx)][:] if j == 0: i0 = len(data_df) protname = hn.split('-')[0] typename = hn.split('-')[1] if protname == 'VC=': continue data_df.extend([[]]*len(data)) for jj in range(len(data)): data_df[i0 + jj] = {} data_df[i0 + jj]['type'] = typename # data_df[i0 + jj]['row_id'] = uuid.uuid4() if typename == 'PNSubatraction': data_df[i0 + jj]['type'] = 'PNSubtraction' elif typename == 'Qualitycontrol': data_df[i0 + jj]['type'] = 'QualityControl' dataname = names[idx].split(hn)[1][1:] if protname == 'VC=': continue if dataname == 'Current-1': continue for jj in range(len(data)): data_df[i0 + jj][dataname] = data[jj] keys = np.unique([list(d.keys()) for d in data_df]) k = [] for key in keys: k.extend(key) keys = np.unique(k) if 'repro_tag_id' not in keys: for i in range(len(data_df)): data_df[i]['repro_tag_id'] = protocols[~(protocols.position >= data_df[i]['onset_times'])]['id'].iloc[-1] traces_idx = np.where([("data.sampled" in d.type) or ("data.events" in d.type) for d in data_arrays])[0] traces_df = [] for i,idx in enumerate(traces_idx): for j in range(len(data_df)): if i == 0: traces_df.append({}) if "data.sampled" in data_arrays[names[int(idx)]].type: idx0 = int((data_df[j]['onset_times'] - before) / dt) idx1 = int((data_df[j]['onset_times'] + data_df[j]['durations'] + after) / dt) if idx0>=idx1: traces_df[j][names[idx]] = np.array([np.nan]) else: traces_df[j][names[idx]] = data_arrays[names[int(idx)]][idx0:idx1] elif "data.events" in data_arrays[names[int(idx)]].type: idx0 = int((data_df[j]['onset_times'] - before) / dt) idx1 = int((data_df[j]['onset_times'] + data_df[j]['durations'] + after) / dt) t0 = data_df[j]['onset_times'] - before t1 = data_df[j]['onset_times'] + data_df[j]['durations'] + after if t0>=t1: traces_df[j][names[idx]] = np.array([np.nan]) else: arr = data_arrays[names[int(idx)]][:] traces_df[j][names[idx]] = arr[(arr>=t0) & (arr<=t1)] - data_df[j]['onset_times'] if i == 0: traces_df[j]['time'] = time[idx0:idx1] - data_df[j]['onset_times'] traces_df[j]['time_before_stimulus'] = before traces_df[j]['time_after_stimulus'] = after traces_df[j]['samplingrate'] = 1 / dt traces_df[j]['meta_id'] = list(block_metadata.keys())[0] if type(data_df[j]['repro_tag_id']) == bytes: data_df[j]['repro_tag_id'] = data_df[j]['repro_tag_id'].decode("utf-8") traces_df[j]['protocol'] = np.array(protocols[protocols.id == data_df[j]['repro_tag_id']].name)[0].split('_')[0] traces_df[j]['protocol_number'] = np.array(protocols[protocols.id == str(data_df[j]['repro_tag_id'])].name)[0].split('_')[1] traces_df[j]['id'] = data_df[j]['repro_tag_id'] metadic = {} for i,key in enumerate(protocols.id): d = GetMetadataDict(tag[key].metadata) if (len(d.keys()) == 1) and ('RePro-Info' in list(d.keys())[0]): d = d['RePro-Info'] metadic[key] = DicCrawl(d, key='tag_meta', delimiter=delimiter) metadic[key].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter)) meta_df = [[]]*len(data_df) for i in range(len(data_df)): meta_df[i] = metadic[str(data_df[i]['repro_tag_id'])] dics = [meta_df, data_df, traces_df] old_maxcount = mindepth for k in range(len(dics)): for j in range(len(dics[k])): keys = list(dics[k][j].keys()) counts = np.array([key.count(delimiter) for key in keys]) maxcount = np.max(counts) if maxcount < mindepth: maxcount = mindepth if maxcount < old_maxcount: maxcount = old_maxcount # append delimiters to the keys until each key contains the same number of keys for i, key in enumerate(keys): add = maxcount - counts[i] newkey=key for ii in range(add): newkey += delimiter dics[k][j][newkey] = dics[k][j].pop(key) old_maxcount = maxcount data_df = pd.DataFrame(data_df) traces_df = pd.DataFrame(traces_df) meta_df = pd.DataFrame(meta_df) data = pd.concat([meta_df, data_df, traces_df], axis=1) data.columns = data.columns.str.split(';', expand=True) if savefile != False: if savefile == True: savefile = nixfile.split('.nix')[0] if saveto != None: savefile = savefile.split('/')[-1] savefile = saveto+savefile with open(savefile + '_dataframe.pickle', 'wb') as f: pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol return data def NixToFrame(folder, before=0.0, after=0.0, skipold=True, saveto=None, mindepth=0): ''' searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder :param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files :param skipold (bool): skip creation of dataframe if it already eists :param saveto (string): path where files should be saved, if None it saves in the folder, where the .nix file is ''' if folder[-1] != '/': folder = folder + '/' dirlist = os.listdir(folder) for dir in dirlist: if os.path.isdir(folder + dir): for file in os.listdir(folder+dir): if ('.nix' in file): if skipold == True: if np.sum(['_dataframe.pickle' in x for x in os.listdir(folder+dir)]) >= 1: print('skip ' + file) continue print(file) DataFrame(folder+dir+'/'+file, before, after, True, saveto, mindepth=mindepth) def load_data(filename): with open(filename, 'rb') as f: data = pickle.load(f) # load data with pickle return data def GetMetadataDict(metadata): def unpackMetadata(sec): metadata = dict() metadata = {prop.name: sec[prop.name] for prop in sec.props} if hasattr(sec, 'sections') and len(sec.sections) > 0: metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections}) return metadata return unpackMetadata(metadata) def DicCrawl(dic, key='', delimiter=';'): keylist = GetKeyList(dic, key=key, delimiter=delimiter) newdic = KeylistToDic(keylist) return newdic def GetKeyList(dic, keylist=[], key='', delimiter=';'): items = dic.items() for item in items: if type(item[1]) == dict: if len(key) == 0: keylist = GetKeyList(item[1], keylist, str(item[0]), delimiter) else: keylist = GetKeyList(item[1], keylist, key + delimiter + str(item[0]), delimiter) else: if len(key) == 0: keylist.append([str(item[0]), item[1]]) else: keylist.append([key + delimiter + str(item[0]), item[1]]) return keylist def KeylistToDic(keylist): dic = {} for item in keylist: dic[item[0]] = item[1] return dic def PNSubtraction(df, currenttrace='Current-1', newcurrenttrace='Current-2', kernelcurrenttrace='Current-3', delimiter=';'): ''' Only for VoltageClamp experiments and WILL BE OBSOLETE, SOON :param df (pandas.DataFrame): NixFrame.DataFrame :param currenttrace: :param newcurrenttrace: :param kernelcurrenttrace: :param delimiter: :return: ''' df = deepcopy(df) if 'TraceId' not in df.columns: for id in np.unique(df.repro_tag_id): dfp = df[df.repro_tag_id == id] if np.isnan(dfp.pn.iloc[0]) or dfp.pn.iloc[0] == 0: continue pn = int(dfp.pn.iloc[0]) for i in np.arange(0,len(dfp),np.abs(pn)+1, dtype=int): locs_pn = df[df.repro_tag_id == id].iloc[i:i+np.abs(pn)].index loc = locs_pn[-1] + 1 if loc <= df[df.repro_tag_id == id].index[-1]: df.loc[locs_pn, 'type'] = 'PNSubtraction' df.loc[loc, 'type'] = 'Trace' trace_idx = np.where(df.type == 'Trace')[0] currentdic = [[]] * len(trace_idx) kerneldic = [[]] * len(trace_idx) affix = '' affix_num = len(list(df)[0]) - 1 for i in range(affix_num): affix += delimiter for idx,i in enumerate(trace_idx): if np.isnan(df.pn.iloc[i]) or df.pn.iloc[i] == 0: continue if 'TraceId' in df.columns: trace_id = df.TraceId.iloc[i] idxvec = np.where((df.TraceId == trace_id) & (df.type == 'PNSubtraction'))[0] else: delays = (df['onset_times'][df.type == 'PNSubtraction']) - df['onset_times'].iloc[i] maxidx = delays[np.array(delays)<0].index[-1] idxvec = np.arange(maxidx-np.abs(df.pn.iloc[i])+1, maxidx+.1, dtype=int) pn_traces = hf.get_traces(currenttrace, df.iloc[idxvec]) I_trace = np.array(df[currenttrace].iloc[i]) pn_traces = np.mean(pn_traces, axis=1) pn_traces -= np.mean(pn_traces[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) pn_trace = pn_traces*df.pn.iloc[i] I_trace -= np.mean(I_trace[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) ''' try kernels ''' Vpn_trace = hf.get_traces('V-1', df.iloc[idxvec]) tpn = hf.get_traces('time', df.iloc[idxvec])[:, 0] Vtr_trace = np.array(df['V-1'].iloc[i]) ttr = np.array(df['time'].iloc[i]) kernel_length = 3 idx0 = np.nanargmin(np.abs(tpn)) - 1 idx1 = np.nanargmin(np.abs(tpn - kernel_length / 1000)) idx40 = np.nanargmin(np.abs(tpn - 0.04)) idx60 = np.nanargmin(np.abs(tpn - 0.06)) Vpn = tpn * 0.0 + np.mean(np.mean(Vpn_trace, axis=1)[:idx0]) Vpn[tpn >= 0] = np.mean(np.mean(Vpn_trace, axis=1)[idx40:idx60]) Vtr = ttr * 0.0 + np.mean(Vtr_trace[:idx0]) Vtr[ttr >= 0] = np.mean(Vtr_trace[idx40:idx60]) ftinput = np.fft.fft(np.diff(Vpn[idx0:idx1])) ftoutput = np.fft.fft(pn_traces[idx0:idx1 - 1]) kernel = np.fft.ifft(ftoutput / ftinput) kernel = np.append(kernel, np.zeros(len(I_trace) - len(kernel))+np.mean(kernel[-20:])) kerneldic[idx] = {} kerneldic[idx][kernelcurrenttrace + affix] = I_trace - np.convolve(np.diff(Vtr), kernel)[:len(I_trace)] #- np.convolve(np.diff(Vtr),kernel2)[:len(I_trace)] ''' end of kernels ''' if len(pn_trace) < len(I_trace): I_trace = I_trace[:len(pn_trace)] elif len(pn_trace) > len(I_trace): pn_trace = pn_trace[:len(I_trace)] currentdic[idx] = {} currentdic[idx][newcurrenttrace+affix] = I_trace - pn_trace currentdf = pd.DataFrame(currentdic, index=df[df.type == 'Trace'].index) currentdf.columns = currentdf.columns.str.split(';', expand=True) df = pd.concat([df, currentdf], axis=1) df = df.drop(index = df[df.type == 'PNSubtraction'].index) return df def QualityControl(df, currenttrace='Current-1', potentialtrace='V-1', delimiter=';'): ''' only for VoltageClamp experiments ''' qc = [[]]*len(df[df.type == 'QualityControl']) prefix = 'qualitycontrol' + delimiter affix = '' affix_num = len(list(df)[0]) - 2 for i in range(affix_num): affix += delimiter # assume that stimulus has 10ms at holdingpotential followed by 10ms at holdingpotential-20 samplingrate = df.samplingrate.iloc[0] idx10ms = int(0.01 * samplingrate) idx20ms = int(0.02 * samplingrate) idx1ms = int(0.001 * samplingrate) for i in range(len(qc)): qc[i] = {} qc[i][prefix + currenttrace + affix] = df[df.type == 'QualityControl'][currenttrace].iloc[i] qc[i][prefix + potentialtrace + affix] = df[df.type == 'QualityControl'][potentialtrace].iloc[i] qc[i][prefix + 'onset_times' + affix] = df[df.type == 'QualityControl']['onset_times'].iloc[i] qc[i][prefix + 'holdingpotential' + affix] = df[df.type == 'QualityControl']['tag_meta']['settings']['holdingpotential'].iloc[i] if 'TraceId' in df.columns: qc[i][prefix + 'TraceId' + affix] = df[df.type == 'QualityControl']['TraceId'].iloc[i] I1 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx10ms-idx1ms : idx10ms - 2]) I2 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx20ms-idx1ms : idx20ms - 2]) R = 20/(I1-I2) qc[i][prefix + 'resistance' + affix] = R qc_df = pd.DataFrame(qc) if len(qc) == 0: return df qc_empty = [{}] for key in qc[0].keys(): qc_empty[0][key] = np.nan qc_dic = qc_empty * len(df) for idx in df[df.type == 'Trace'].index: i = np.where(df.index == idx)[0][0] if 'TraceId' in df.columns: qc_dic[i] = qc[np.where(qc_df[prefix + 'TraceId' + affix] == df.TraceId.loc[idx])[0][0]] else: None delays = qc_df[prefix + 'onset_times' + affix] - df['onset_times'].loc[idx] maxidx = np.where(delays[np.array(delays) < 0])[0][0] qc_dic[i] = qc[maxidx] qc = pd.DataFrame(qc_dic, index=df.index) qc.columns = qc.columns.str.split(';', expand=True) df = pd.concat([df, qc], axis=1) df = df.drop(index=df[df.type == 'QualityControl'].index) return df