diff --git a/NixFrame.py b/NixFrame.py index bfcc85b..eb875be 100644 --- a/NixFrame.py +++ b/NixFrame.py @@ -1,568 +1,446 @@ -import nixio as nix -from IPython import embed -import numpy as np -import os -import pandas as pd -import pickle -import helperfunctions as hf -from copy import deepcopy -import gc - - -def DataFrame(nixfile, before=0.001, after=0.001, savefile=False, saveto='./', mindepth=0, delimiter=';'): - ''' - opens a nix file, extracts the data and converts it to a pandas.DataFrame - - :param nixfile (string): path and name of .nix file - :param overlap (float): time before and after sweeps that is saved, too - :param savefile (string): if not False, the dataframe will be saved as .pickle - :param saveto (string): path to save the files in NOT IMPLEMENTED YET - :return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data - ''' - - try: - block = nix.File.open(nixfile,'r').blocks[0] - except: - print('cannot open ' + nixfile.split('/')[-1] + ', skip it') - return None - - data_arrays = block.data_arrays - names = [data_arrays[i].name for i in range(len(data_arrays))] - shapes = [x.shape for x in data_arrays] - data_names = np.array([[x, i] for i, x in enumerate(names) if (shapes[i][0] >= 0.999 * shapes[0][0])]) - header_names = [mt.name for mt in block.multi_tags] - - try: - data_traces = np.array([data_arrays[name][:] for name, idx in data_names]) - except: - print(nixfile.split('/')[-1] + ' has corrputed data_arrays, no file will be saved') - return None - - time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0]) - dt = time[1]-time[0] - - block_metadata = {} - block_metadata[block.id] = GetMetadataDict(block.metadata) - - tag = block.tags - tag_metadata = {} - tag_id_times = {} - protocols = [[]] * len(tag) - for i,t in enumerate(tag): - protocols[i] = {} - protocols[i]['id'] = t.id - protocols[i]['name'] = t.name - protocols[i]['position'] = t.position[0] - protocols = pd.DataFrame(protocols) - - for i in range(len(tag)): - try: - meta = tag[i].metadata - tag_metadata[meta.id] = GetMetadataDict(meta) - # tag_metadata[meta.id].update(DicCrawl(GetMetadataDict(meta), mindepth=mindepth)) - tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]] - except: - print(nixfile.split('/')[-1] + ' has no tags, no file will be saved') - return None - - protocol_idcs = np.where([(' onset times' in name) for name in names])[0] - - if len(protocol_idcs) == 0: - print(nixfile.split('/')[-1] + ' is empty, no file will be saved') - return None - - data_df = [] - for hn in header_names: - idcs = np.where(np.array([hn in name[:len(hn)] for name in names], dtype=int))[0] - for j,idx in enumerate(idcs): - data = data_arrays[int(idx)][:] - if j == 0: - i0 = len(data_df) - protname = hn.split('-')[0] - typename = hn.split('-')[1] - if protname == 'VC=': - continue - - data_df.extend([[]]*len(data)) - for jj in range(len(data)): - data_df[i0 + jj] = {} - data_df[i0 + jj]['type'] = typename - # data_df[i0 + jj]['row_id'] = uuid.uuid4() - if typename == 'PNSubatraction': - data_df[i0 + jj]['type'] = 'PNSubtraction' - elif typename == 'Qualitycontrol': - data_df[i0 + jj]['type'] = 'QualityControl' - - dataname = names[idx].split(hn)[1][1:] - if protname == 'VC=': - continue - if dataname == 'Current-1': - continue - for jj in range(len(data)): - data_df[i0 + jj][dataname] = data[jj] - - keys = np.unique([list(d.keys()) for d in data_df]) - k = [] - for key in keys: - k.extend(key) - keys = np.unique(k) - if 'repro_tag_id' not in keys: - for i in range(len(data_df)): - data_df[i]['repro_tag_id'] = protocols[~(protocols.position >= data_df[i]['onset times'])]['id'].iloc[-1] - - traces_idx = np.where([("data.sampled" in d.type) or ("data.events" in d.type) for d in data_arrays])[0] - traces_df = [] - - - for i,idx in enumerate(traces_idx): - # print(i,i/len(traces_idx)) - for j in range(len(data_df)): - if i == 0: - traces_df.append({}) - - if "data.sampled" in data_arrays[names[int(idx)]].type: - idx0 = int((data_df[j]['onset times'] - before) / dt) - idx1 = int((data_df[j]['onset times'] + data_df[j]['durations'] + after) / dt + 1) - # traces_df[names[idx]] = data_arrays[int(idx)][:][idx0:idx1] - traces_df[j][names[idx]] = data_arrays[names[int(idx)]][idx0:idx1] - elif "data.events" in data_arrays[names[int(idx)]].type: - t0 = data_df[j]['onset times'] - before - t1 = data_df[j]['onset times'] + data_df[j]['durations'] + after - arr = data_arrays[names[int(idx)]][:] - traces_df[j][names[idx]] = arr[(arr>=t0) & (arr<=t1)] - - if i == 0: - traces_df[j]['time'] = time[idx0:idx1] - data_df[j]['onset times'] - traces_df[j]['time_before_stimulus'] = before - traces_df[j]['time_after_stimulus'] = after - traces_df[j]['samplingrate'] = 1 / dt - traces_df[j]['meta_id'] = list(block_metadata.keys())[0] - - if type(data_df[j]['repro_tag_id']) == bytes: - data_df[j]['repro_tag_id'] = data_df[j]['repro_tag_id'].decode("utf-8") - traces_df[j]['protocol'] = np.array(protocols[protocols.id == data_df[j]['repro_tag_id']].name)[0].split('_')[0] - traces_df[j]['protocol_number'] = np.array(protocols[protocols.id == str(data_df[j]['repro_tag_id'])].name)[0].split('_')[1] - traces_df[j]['id'] = data_df[j]['repro_tag_id'] - - - metadic = {} - print('meta') - for i,key in enumerate(protocols.id): - d = GetMetadataDict(tag[key].metadata) - if (len(d.keys()) == 1) and ('RePro-Info' in list(d.keys())[0]): - d = d['RePro-Info'] - metadic[key] = DicCrawl(d, key='tag_meta', delimiter=delimiter) - metadic[key].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter)) - meta_df = [[]]*len(data_df) - for i in range(len(data_df)): - meta_df[i] = metadic[str(data_df[i]['repro_tag_id'])] - - dics = [meta_df, data_df, traces_df] - # old_maxcount = mindepth - # for i in range(len(protocol_idcs)): - # protocol = names[protocol_idcs[i]].split(' onset times')[0] - # - # #skip certain protocols - # if 'VC=' in protocol: - # # print('skip this protocol') - # continue - # #number of meta data entries - # if i == len(protocol_idcs)-1: - # meta_len = len(names) - protocol_idcs[i] - # else: - # meta_len = protocol_idcs[i+1] - protocol_idcs[i] - # - # #get new line for every sweep and save the data, make a pn subtraction if necessary - # if any([protocol + '_pn' == string for string in names[protocol_idcs[i]:protocol_idcs[i]+meta_len]]): - # pn = data_arrays[protocol + '_pn'][0] - # sweeps = np.arange(np.abs(pn),len(data_arrays[int(protocol_idcs[i])][:]),(np.abs(pn)+1), dtype=int) - # else: - # pn = np.nan - # sweeps = np.arange(len(data_arrays[int(protocol_idcs[i])][:]), dtype=int) - # - # for sweep in sweeps: - # stim_num +=1 - # data.append({}) - # - # # save protocol names - # split_vec = protocol.split('-') - # if len(split_vec)>2: - # prot_name = split_vec[0] - # prot_num = int(split_vec[-1]) - # for j in range(len(split_vec)-2): - # prot_name += '-' + split_vec[j+1] - # else: - # prot_name = split_vec[0] - # prot_num = split_vec[-1] - # data[stim_num]['protocol'] = prot_name - # data[stim_num]['protocol_number'] = prot_num - # data[stim_num]['time_before_stimulus'] = before - # data[stim_num]['time_after_stimulus'] = after - # - # #save id - # data[stim_num]['id'] = data_arrays[int(protocol_idcs[i])].id - # - # #save rest of stored data - # for idx in range(meta_len): - # j = int(protocol_idcs[i] + idx) - # if (' durations' in names[j]) or (' onset times' in names[j]): - # continue - # if len(data_arrays[j][sweep]) == 1: - # data[stim_num][names[j].split(protocol + '_')[-1]] = data_arrays[j][sweep][0] - # else: - # data[stim_num][names[j].split(protocol+'_')[-1]] = data_arrays[j][sweep] - # data[stim_num]['samplingrate'] = 1/dt - # - # #save data arrays - # onset = data_arrays[protocol + ' onset times'][sweep] - # dur = data_arrays[protocol + ' durations'][sweep] - # t0 = int((onset-before)/dt) - # t1 = int((onset+after+dur)/dt+1) - # data[stim_num]['onset time'] = onset - # data[stim_num]['duration'] = dur - # - # for name,idx in data_names: - # data[stim_num][name] = data_traces[int(idx)][t0:t1] - # - # for j in np.arange(int(idx)+1,protocol_idcs[0]): - # bool_vec = (data_arrays[names[j]][:]>=onset) & (data_arrays[names[j]][:]<=onset+dur) - # data[stim_num][names[j]] = np.array(data_arrays[names[j]])[bool_vec] - # - # data[stim_num]['time'] = time[t0:t1] - data[stim_num]['onset time'] - # - # #pn-subtraction (if necessary) - # ''' - # change the location of the pn (its already in the metadata, you dont need it as option - # ''' - # if pn != np.nan and np.abs(pn)>0: - # pn_curr = np.zeros(len(data[stim_num][name])) - # idx = np.where(data_names[:,0] == 'Current-1')[0][0] - # for j in range(int(np.abs(pn))): - # onset = data_arrays[protocol + ' onset times'][sweep-j-1] - # t0 = int((onset-before) / dt) - # t1 = int(t0 + len(pn_curr)) - # - # pn_curr += data_traces[int(idx),t0:t1] - np.mean(data_traces[int(idx),t0:int(t0+before/dt)]) - # import matplotlib.pyplot as plt - # plt.plot(data_traces[int(idx),t0:t1] - np.mean(data_traces[int(idx),t0:int(t0+before/dt)])) - # - # data[stim_num]['Current-2'] = data[stim_num]['Current-1'] - pn/np.abs(pn)*pn_curr - np.mean(data[stim_num]['Current-1'][0:int(before/dt)]) - # - # for mt in block.tags: - # # if 'Trace' in mt.metadata.name: - # print(mt.name) - # # mt.metadata.pprint() - # embed() - # - # ''' - # this one saves the complete metadata in EVERY line - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!THINK OF SOMETHING BETTER!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # ''' - # - # tag_id = None - # for key in tag_id_times.keys(): - # if (data[stim_num]['onset time'] >= tag_id_times[key][0]) and (data[stim_num]['onset time'] <= tag_id_times[key][1]): - # tag_id = key - # # # save metadata - # # data[stim_num]['block_meta'] = block_metadata[list(block_metadata.keys())[0]] - # data[stim_num].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter)) - # - # if tag_id != None: - # tagmetadata = tag_metadata[tag_id] - # if (len(tagmetadata)==1) & (list(tagmetadata.keys())[0] == 'RePro-Info'): - # # data[stim_num]['tag_meta'] = tag_metadata[tag_id]['RePro-Info'] - # data[stim_num].update(DicCrawl(tag_metadata[tag_id]['RePro-Info'], key='tag_meta', delimiter=delimiter)) - # else: - # # data[stim_num]['tag_meta'] = tag_metadata[tag_id] - # data[stim_num].update(DicCrawl(tag_metadata[tag_id], delimiter=delimiter)) - # - # elif tag_id == None: - # print('no tag metadata for ' + nixfile.split('/')[-1] + ', ' + protocol) - # - # # add block id - # data[stim_num]['block_id'] = list(block_metadata.keys())[0] - # data[stim_num]['tag_id'] = tag_id - - # add delimiters at end of keys to be able to correctly split dataframe - # count maximum depth of the keylist (count max. of delimiters in a row) - old_maxcount = mindepth - for k in range(len(dics)): - for j in range(len(dics[k])): - keys = list(dics[k][j].keys()) - - counts = np.array([key.count(delimiter) for key in keys]) - maxcount = np.max(counts) - if maxcount < mindepth: - maxcount = mindepth - - if maxcount < old_maxcount: - maxcount = old_maxcount - - # append delimiters to the keys until each key contains the same number of keys - for i, key in enumerate(keys): - add = maxcount - counts[i] - newkey=key - for ii in range(add): - newkey += delimiter - dics[k][j][newkey] = dics[k][j].pop(key) - old_maxcount = maxcount - - data_df = pd.DataFrame(data_df) - traces_df = pd.DataFrame(traces_df) - meta_df = pd.DataFrame(meta_df) - - data = pd.concat([meta_df, data_df, traces_df], axis=1, sort=False) - data.columns = data.columns.str.split(';', expand=True) - - if savefile != False: - if savefile == True: - # savefile = nixfile.split('/')[-1].split('.nix')[0] - savefile = nixfile.split('.nix')[0] - if saveto != None: - savefile = savefile.split('/')[-1] - savefile = saveto+savefile - - with open(savefile + '_dataframe.pickle', 'wb') as f: - pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol - return data - - -def NixToFrame(folder, before=0.0, after=0.0, skipold=True, saveto=None, mindepth=0): - ''' - searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder - - :param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files - :param skipold (bool): skip creation of dataframe if it already eists - :param saveto (string): path where files should be saved, if None it saves in the folder, where the .nix file is - ''' - if folder[-1] != '/': - folder = folder + '/' - - dirlist = os.listdir(folder) - for dir in dirlist: - if os.path.isdir(folder + dir): - for file in os.listdir(folder+dir): - if ('.nix' in file): - if skipold == True: - if np.sum(['_dataframe.pickle' in x for x in os.listdir(folder+dir)]) >= 1: - print('skip ' + file) - continue - print(file) - DataFrame(folder+dir+'/'+file, before, after, True, saveto, mindepth=mindepth) - - -def GetMetadataDict(metadata): - def unpackMetadata(sec): - metadata = dict() - metadata = {prop.name: sec[prop.name] for prop in sec.props} - if hasattr(sec, 'sections') and len(sec.sections) > 0: - metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections}) - return metadata - - return unpackMetadata(metadata) - - -def DicCrawl(dic, key='', delimiter=';'): - keylist = GetKeyList(dic, key=key, delimiter=delimiter) - newdic = KeylistToDic(keylist) - return newdic - - -def GetKeyList(dic, keylist=[], key='', delimiter=';'): - items = dic.items() - for item in items: - if type(item[1]) == dict: - if len(key) == 0: - keylist = GetKeyList(item[1], keylist, str(item[0]), delimiter) - else: - keylist = GetKeyList(item[1], keylist, key + delimiter + str(item[0]), delimiter) - else: - if len(key) == 0: - keylist.append([str(item[0]), item[1]]) - else: - keylist.append([key + delimiter + str(item[0]), item[1]]) - return keylist - - -def KeylistToDic(keylist): - dic = {} - for item in keylist: - dic[item[0]] = item[1] - return dic - - -def load_data(filename): - with open(filename, 'rb') as f: - data = pickle.load(f) # load data with pickle - return data - - -def PNSubtraction(df, currenttrace='Current-1', newcurrenttrace='Current-2', kernelcurrenttrace='Current-3', delimiter=';'): - df = deepcopy(df) - embed() - if 'TraceId' not in df.columns: - for id in np.unique(df.repro_tag_id): - dfp = df[df.repro_tag_id == id] - if np.isnan(dfp.pn.iloc[0]) or dfp.pn.iloc[0] == 0: - continue - - pn = int(dfp.pn.iloc[0]) - for i in np.arange(0,len(dfp),np.abs(pn)+1, dtype=int): - locs_pn = df[df.repro_tag_id == id].iloc[i:i+np.abs(pn)].index - loc = locs_pn[-1] + 1 - - if loc <= df[df.repro_tag_id == id].index[-1]: - df.loc[locs_pn, 'type'] = 'PNSubtraction' - df.loc[loc, 'type'] = 'Trace' - - - trace_idx = np.where(df.type == 'Trace')[0] - currentdic = [[]] * len(trace_idx) - kerneldic = [[]] * len(trace_idx) - affix = '' - affix_num = len(list(df)[0]) - 1 - for i in range(affix_num): - affix += delimiter - - for idx,i in enumerate(trace_idx): - if np.isnan(df.pn.iloc[i]) or df.pn.iloc[i] == 0: - continue - - if 'TraceId' in df.columns: - trace_id = df.TraceId.iloc[i] - idxvec = np.where((df.TraceId == trace_id) & (df.type == 'PNSubtraction'))[0] - else: - delays = (df['onset times'][df.type == 'PNSubtraction']) - df['onset times'].iloc[i] - maxidx = delays[np.array(delays)<0].index[-1] - idxvec = np.arange(maxidx-np.abs(df.pn.iloc[i])+1, maxidx+.1, dtype=int) - - pn_traces = hf.get_traces(currenttrace, df.iloc[idxvec]) - I_trace = np.array(df[currenttrace].iloc[i]) - - pn_traces = np.mean(pn_traces, axis=1) - pn_traces -= np.mean(pn_traces[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) - pn_trace = pn_traces*df.pn.iloc[i] - I_trace -= np.mean(I_trace[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) - - ''' try kernels ''' - Vpn_trace = hf.get_traces('V-1', df.iloc[idxvec]) - tpn = hf.get_traces('time', df.iloc[idxvec])[:, 0] - Vtr_trace = np.array(df['V-1'].iloc[i]) - ttr = np.array(df['time'].iloc[i]) - kernel_length = 3 - idx0 = np.nanargmin(np.abs(tpn)) - 1 - idx1 = np.nanargmin(np.abs(tpn - kernel_length / 1000)) - idx40 = np.nanargmin(np.abs(tpn - 0.04)) - idx60 = np.nanargmin(np.abs(tpn - 0.06)) - - Vpn = tpn * 0.0 + np.mean(np.mean(Vpn_trace, axis=1)[:idx0]) - Vpn[tpn >= 0] = np.mean(np.mean(Vpn_trace, axis=1)[idx40:idx60]) - Vtr = ttr * 0.0 + np.mean(Vtr_trace[:idx0]) - Vtr[ttr >= 0] = np.mean(Vtr_trace[idx40:idx60]) - - ftinput = np.fft.fft(np.diff(Vpn[idx0:idx1])) - ftoutput = np.fft.fft(pn_traces[idx0:idx1 - 1]) - kernel = np.fft.ifft(ftoutput / ftinput) - kernel = np.append(kernel, np.zeros(len(I_trace) - len(kernel))+np.mean(kernel[-20:])) - # kernel = kernel - np.mean(kernel[-20:]) - # kernel2 = ttr*0.0 - # kernel2[ttr>=0] = np.mean(kernel[-20:]) - kerneldic[idx] = {} - kerneldic[idx][kernelcurrenttrace + affix] = I_trace - np.convolve(np.diff(Vtr), kernel)[:len(I_trace)] #- np.convolve(np.diff(Vtr),kernel2)[:len(I_trace)] - ''' end of kernels ''' - - - if len(pn_trace) < len(I_trace): - # print(len(pn_traces) - len(I_trace)) - I_trace = I_trace[:len(pn_trace)] - elif len(pn_trace) > len(I_trace): - # print(len(pn_traces) - len(I_trace)) - pn_trace = pn_trace[:len(I_trace)] - - currentdic[idx] = {} - currentdic[idx][newcurrenttrace+affix] = I_trace - pn_trace - - if df.protocol.iloc[i] == 'Recovery': - import matplotlib.pyplot as plt - plt.plot(I_trace,'k', label='original') - plt.plot(kerneldic[idx][kernelcurrenttrace + affix], label='kernel') - plt.plot(currentdic[idx][newcurrenttrace+affix],label='pn') - plt.legend() - plt.show() - embed() - - # import matplotlib.pyplot as plt - # if df.protocol.iloc[i] == 'Activation': - # plt.figure() - # plt.title(df.step.iloc[i]) - # # plt.plot(hf.get_traces(currenttrace, df.iloc[idxvec]), 'k') - # # plt.plot(np.array(df[currenttrace].iloc[i])) - # plt.plot(pn_traces, 'k') - # plt.plot(I_trace) - - # df[i][newcurrenttrace] = I_trace - # plt.show() - currentdf = pd.DataFrame(currentdic, index=df[df.type == 'Trace'].index) - currentdf.columns = currentdf.columns.str.split(';', expand=True) - df = pd.concat([df, currentdf], axis=1) - df = df.drop(index = df[df.type == 'PNSubtraction'].index) - return df - - -def QualityControl(df, currenttrace='Current-1', potentialtrace='V-1', delimiter=';'): - qc = [[]]*len(df[df.type == 'QualityControl']) - prefix = 'qualitycontrol' + delimiter - affix = '' - affix_num = len(list(df)[0]) - 2 - for i in range(affix_num): - affix += delimiter - - # assume that stimulus has 10ms at holdingpotential followed by 10ms at holdingpotential-20 - samplingrate = df.samplingrate.iloc[0] - idx10ms = int(0.01 * samplingrate) - idx20ms = int(0.02 * samplingrate) - idx1ms = int(0.001 * samplingrate) - - for i in range(len(qc)): - qc[i] = {} - qc[i][prefix + currenttrace + affix] = df[df.type == 'QualityControl'][currenttrace].iloc[i] - qc[i][prefix + potentialtrace + affix] = df[df.type == 'QualityControl'][potentialtrace].iloc[i] - qc[i][prefix + 'onset times' + affix] = df[df.type == 'QualityControl']['onset times'].iloc[i] - qc[i][prefix + 'holdingpotential' + affix] = df[df.type == 'QualityControl']['tag_meta']['settings']['holdingpotential'].iloc[i] - if 'TraceId' in df.columns: - qc[i][prefix + 'TraceId' + affix] = df[df.type == 'QualityControl']['TraceId'].iloc[i] - - I1 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx10ms-idx1ms : idx10ms - 2]) - I2 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx20ms-idx1ms : idx20ms - 2]) - R = 20/(I1-I2) - qc[i][prefix + 'resistance' + affix] = R - qc_df = pd.DataFrame(qc) - - if len(qc) == 0: - return df - - qc_empty = [{}] - for key in qc[0].keys(): - qc_empty[0][key] = np.nan - qc_dic = qc_empty * len(df) - - for idx in df[df.type == 'Trace'].index: - i = np.where(df.index == idx)[0][0] - if 'TraceId' in df.columns: - qc_dic[i] = qc[np.where(qc_df[prefix + 'TraceId' + affix] == df.TraceId.loc[idx])[0][0]] - else: - None - delays = qc_df[prefix + 'onset times' + affix] - df['onset times'].loc[idx] - maxidx = np.where(delays[np.array(delays) < 0])[0][0] - qc_dic[i] = qc[maxidx] - - qc = pd.DataFrame(qc_dic, index=df.index) - qc.columns = qc.columns.str.split(';', expand=True) - - # try: - df = pd.concat([df, qc], axis=1) - # qc = qc.drop(index=np.where(df.type == 'QualityControl')[0]) - df = df.drop(index=df[df.type == 'QualityControl'].index) - # except: embed() - return df - +import nixio as nix +import numpy as np +import os +import pandas as pd +import pickle +from copy import deepcopy + +''' +This File is meant to convert .nix Files into a more readable format (pandas.DataFrame) + +NixToFrame() searches all folders in for .nix files and converts them with +DataFrame() and saves them as picklefile + +df = load_data() can load these picklefiles +''' + + +def DataFrame(nixfile, before=0.001, after=0.001, savefile=False, saveto='./', mindepth=0, delimiter=';'): + ''' + opens a nix file, extracts the data and converts it to a pandas.DataFrame + + :param nixfile (string): path and name of .nix file + :param before (float): time before sweeps that is saved, too (in the unit that is given in the file. Usually [s]) + :param after (float): time after sweeps that is saved, too (in the unit that is given in the file. Usually [s]) + :param savefile (string, bool): if not False and a string, the dataframe will be saved as .pickle, if True it will be saved as nixfile.split('.nix')[0] + :param saveto (string): path to save the files in path defined by saveto + :param mindepth (int): minimum number of nested entries in dataframe + :param delimiter (string): internally used to create the nested dataframe, use a delimiter that does not naturally occur in your protocol names + + :return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data + ''' + + try: + block = nix.File.open(nixfile,'r').blocks[0] + except: + print('cannot open ' + nixfile.split('/')[-1] + ', skip it') + return None + + data_arrays = block.data_arrays + names = [data_arrays[i].name for i in range(len(data_arrays))] + shapes = [x.shape for x in data_arrays] + data_names = np.array([[x, i] for i, x in enumerate(names) if (shapes[i][0] >= 0.999 * shapes[0][0])]) + header_names = [mt.name for mt in block.multi_tags] + + try: + data_traces = np.array([data_arrays[name][:] for name, idx in data_names]) + except: + print(nixfile.split('/')[-1] + ' has corrputed data_arrays, no file will be saved') + return None + + time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0]) + dt = time[1]-time[0] + + block_metadata = {} + block_metadata[block.id] = GetMetadataDict(block.metadata) + + tag = block.tags + tag_metadata = {} + tag_id_times = {} + protocols = [[]] * len(tag) + for i,t in enumerate(tag): + protocols[i] = {} + protocols[i]['id'] = t.id + protocols[i]['name'] = t.name + protocols[i]['position'] = t.position[0] + protocols = pd.DataFrame(protocols) + + for i in range(len(tag)): + try: + meta = tag[i].metadata + tag_metadata[meta.id] = GetMetadataDict(meta) + tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]] + except: + print(nixfile.split('/')[-1] + ' has no tags, no file will be saved') + return None + + protocol_idcs_old = np.where([(' onset times' in name) for name in names])[0] + durations_idcs_old = np.where([(' durations' in name) for name in names])[0] + for idx in protocol_idcs_old: + names[idx] = names[idx].split(' onset times')[0] + '_onset_times' + for idx in durations_idcs_old: + names[idx] = names[idx].split(' durations')[0] + '_durations' + + protocol_idcs = np.where([('_onset_times' in name) for name in names])[0] + + if len(protocol_idcs) == 0: + print(nixfile.split('/')[-1] + ' is empty, no file will be saved') + return None + + data_df = [] + for hn in header_names: + idcs = np.where(np.array([hn in name[:len(hn)] for name in names], dtype=int))[0] + for j,idx in enumerate(idcs): + data = data_arrays[int(idx)][:] + if j == 0: + i0 = len(data_df) + protname = hn.split('-')[0] + typename = hn.split('-')[1] + if protname == 'VC=': + continue + + data_df.extend([[]]*len(data)) + for jj in range(len(data)): + data_df[i0 + jj] = {} + data_df[i0 + jj]['type'] = typename + # data_df[i0 + jj]['row_id'] = uuid.uuid4() + if typename == 'PNSubatraction': + data_df[i0 + jj]['type'] = 'PNSubtraction' + elif typename == 'Qualitycontrol': + data_df[i0 + jj]['type'] = 'QualityControl' + + dataname = names[idx].split(hn)[1][1:] + if protname == 'VC=': + continue + if dataname == 'Current-1': + continue + for jj in range(len(data)): + data_df[i0 + jj][dataname] = data[jj] + + keys = np.unique([list(d.keys()) for d in data_df]) + k = [] + for key in keys: + k.extend(key) + keys = np.unique(k) + if 'repro_tag_id' not in keys: + for i in range(len(data_df)): + data_df[i]['repro_tag_id'] = protocols[~(protocols.position >= data_df[i]['onset_times'])]['id'].iloc[-1] + + traces_idx = np.where([("data.sampled" in d.type) or ("data.events" in d.type) for d in data_arrays])[0] + traces_df = [] + + + for i,idx in enumerate(traces_idx): + for j in range(len(data_df)): + if i == 0: + traces_df.append({}) + + if "data.sampled" in data_arrays[names[int(idx)]].type: + idx0 = int((data_df[j]['onset_times'] - before) / dt) + idx1 = int((data_df[j]['onset_times'] + data_df[j]['durations'] + after) / dt) + if idx0>=idx1: + traces_df[j][names[idx]] = np.array([np.nan]) + else: + traces_df[j][names[idx]] = data_arrays[names[int(idx)]][idx0:idx1] + elif "data.events" in data_arrays[names[int(idx)]].type: + idx0 = int((data_df[j]['onset_times'] - before) / dt) + idx1 = int((data_df[j]['onset_times'] + data_df[j]['durations'] + after) / dt) + t0 = data_df[j]['onset_times'] - before + t1 = data_df[j]['onset_times'] + data_df[j]['durations'] + after + if t0>=t1: + traces_df[j][names[idx]] = np.array([np.nan]) + else: + arr = data_arrays[names[int(idx)]][:] + traces_df[j][names[idx]] = arr[(arr>=t0) & (arr<=t1)] - data_df[j]['onset_times'] + + if i == 0: + traces_df[j]['time'] = time[idx0:idx1] - data_df[j]['onset_times'] + traces_df[j]['time_before_stimulus'] = before + traces_df[j]['time_after_stimulus'] = after + traces_df[j]['samplingrate'] = 1 / dt + traces_df[j]['meta_id'] = list(block_metadata.keys())[0] + + if type(data_df[j]['repro_tag_id']) == bytes: + data_df[j]['repro_tag_id'] = data_df[j]['repro_tag_id'].decode("utf-8") + traces_df[j]['protocol'] = np.array(protocols[protocols.id == data_df[j]['repro_tag_id']].name)[0].split('_')[0] + traces_df[j]['protocol_number'] = np.array(protocols[protocols.id == str(data_df[j]['repro_tag_id'])].name)[0].split('_')[1] + traces_df[j]['id'] = data_df[j]['repro_tag_id'] + + metadic = {} + for i,key in enumerate(protocols.id): + d = GetMetadataDict(tag[key].metadata) + if (len(d.keys()) == 1) and ('RePro-Info' in list(d.keys())[0]): + d = d['RePro-Info'] + metadic[key] = DicCrawl(d, key='tag_meta', delimiter=delimiter) + metadic[key].update(DicCrawl(block_metadata[list(block_metadata.keys())[0]], key='block_meta', delimiter=delimiter)) + meta_df = [[]]*len(data_df) + for i in range(len(data_df)): + meta_df[i] = metadic[str(data_df[i]['repro_tag_id'])] + + dics = [meta_df, data_df, traces_df] + old_maxcount = mindepth + + for k in range(len(dics)): + for j in range(len(dics[k])): + keys = list(dics[k][j].keys()) + + counts = np.array([key.count(delimiter) for key in keys]) + maxcount = np.max(counts) + if maxcount < mindepth: + maxcount = mindepth + + if maxcount < old_maxcount: + maxcount = old_maxcount + + # append delimiters to the keys until each key contains the same number of keys + for i, key in enumerate(keys): + add = maxcount - counts[i] + newkey=key + for ii in range(add): + newkey += delimiter + dics[k][j][newkey] = dics[k][j].pop(key) + old_maxcount = maxcount + + data_df = pd.DataFrame(data_df) + traces_df = pd.DataFrame(traces_df) + meta_df = pd.DataFrame(meta_df) + + data = pd.concat([meta_df, data_df, traces_df], axis=1) + data.columns = data.columns.str.split(';', expand=True) + + if savefile != False: + if savefile == True: + savefile = nixfile.split('.nix')[0] + if saveto != None: + savefile = savefile.split('/')[-1] + savefile = saveto+savefile + + with open(savefile + '_dataframe.pickle', 'wb') as f: + pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol + return data + + +def NixToFrame(folder, before=0.0, after=0.0, skipold=True, saveto=None, mindepth=0): + ''' + searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder + + :param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files + :param skipold (bool): skip creation of dataframe if it already eists + :param saveto (string): path where files should be saved, if None it saves in the folder, where the .nix file is + ''' + if folder[-1] != '/': + folder = folder + '/' + + dirlist = os.listdir(folder) + for dir in dirlist: + if os.path.isdir(folder + dir): + for file in os.listdir(folder+dir): + if ('.nix' in file): + if skipold == True: + if np.sum(['_dataframe.pickle' in x for x in os.listdir(folder+dir)]) >= 1: + print('skip ' + file) + continue + print(file) + DataFrame(folder+dir+'/'+file, before, after, True, saveto, mindepth=mindepth) + + +def load_data(filename): + with open(filename, 'rb') as f: + data = pickle.load(f) # load data with pickle + return data + + +def GetMetadataDict(metadata): + def unpackMetadata(sec): + metadata = dict() + metadata = {prop.name: sec[prop.name] for prop in sec.props} + if hasattr(sec, 'sections') and len(sec.sections) > 0: + metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections}) + return metadata + return unpackMetadata(metadata) + + +def DicCrawl(dic, key='', delimiter=';'): + keylist = GetKeyList(dic, key=key, delimiter=delimiter) + newdic = KeylistToDic(keylist) + return newdic + + +def GetKeyList(dic, keylist=[], key='', delimiter=';'): + items = dic.items() + for item in items: + if type(item[1]) == dict: + if len(key) == 0: + keylist = GetKeyList(item[1], keylist, str(item[0]), delimiter) + else: + keylist = GetKeyList(item[1], keylist, key + delimiter + str(item[0]), delimiter) + else: + if len(key) == 0: + keylist.append([str(item[0]), item[1]]) + else: + keylist.append([key + delimiter + str(item[0]), item[1]]) + return keylist + + +def KeylistToDic(keylist): + dic = {} + for item in keylist: + dic[item[0]] = item[1] + return dic + + +def PNSubtraction(df, currenttrace='Current-1', newcurrenttrace='Current-2', kernelcurrenttrace='Current-3', delimiter=';'): + ''' + Only for VoltageClamp experiments and WILL BE OBSOLETE, SOON + + :param df (pandas.DataFrame): NixFrame.DataFrame + :param currenttrace: + :param newcurrenttrace: + :param kernelcurrenttrace: + :param delimiter: + :return: + ''' + + df = deepcopy(df) + if 'TraceId' not in df.columns: + for id in np.unique(df.repro_tag_id): + dfp = df[df.repro_tag_id == id] + if np.isnan(dfp.pn.iloc[0]) or dfp.pn.iloc[0] == 0: + continue + + pn = int(dfp.pn.iloc[0]) + for i in np.arange(0,len(dfp),np.abs(pn)+1, dtype=int): + locs_pn = df[df.repro_tag_id == id].iloc[i:i+np.abs(pn)].index + loc = locs_pn[-1] + 1 + + if loc <= df[df.repro_tag_id == id].index[-1]: + df.loc[locs_pn, 'type'] = 'PNSubtraction' + df.loc[loc, 'type'] = 'Trace' + + + trace_idx = np.where(df.type == 'Trace')[0] + currentdic = [[]] * len(trace_idx) + kerneldic = [[]] * len(trace_idx) + affix = '' + affix_num = len(list(df)[0]) - 1 + for i in range(affix_num): + affix += delimiter + + for idx,i in enumerate(trace_idx): + if np.isnan(df.pn.iloc[i]) or df.pn.iloc[i] == 0: + continue + + if 'TraceId' in df.columns: + trace_id = df.TraceId.iloc[i] + idxvec = np.where((df.TraceId == trace_id) & (df.type == 'PNSubtraction'))[0] + else: + delays = (df['onset_times'][df.type == 'PNSubtraction']) - df['onset_times'].iloc[i] + maxidx = delays[np.array(delays)<0].index[-1] + idxvec = np.arange(maxidx-np.abs(df.pn.iloc[i])+1, maxidx+.1, dtype=int) + + pn_traces = hf.get_traces(currenttrace, df.iloc[idxvec]) + I_trace = np.array(df[currenttrace].iloc[i]) + + pn_traces = np.mean(pn_traces, axis=1) + pn_traces -= np.mean(pn_traces[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) + pn_trace = pn_traces*df.pn.iloc[i] + I_trace -= np.mean(I_trace[:int(df.iloc[i].time_before_stimulus * df.iloc[i].samplingrate)]) + + ''' try kernels ''' + Vpn_trace = hf.get_traces('V-1', df.iloc[idxvec]) + tpn = hf.get_traces('time', df.iloc[idxvec])[:, 0] + Vtr_trace = np.array(df['V-1'].iloc[i]) + ttr = np.array(df['time'].iloc[i]) + kernel_length = 3 + idx0 = np.nanargmin(np.abs(tpn)) - 1 + idx1 = np.nanargmin(np.abs(tpn - kernel_length / 1000)) + idx40 = np.nanargmin(np.abs(tpn - 0.04)) + idx60 = np.nanargmin(np.abs(tpn - 0.06)) + + Vpn = tpn * 0.0 + np.mean(np.mean(Vpn_trace, axis=1)[:idx0]) + Vpn[tpn >= 0] = np.mean(np.mean(Vpn_trace, axis=1)[idx40:idx60]) + Vtr = ttr * 0.0 + np.mean(Vtr_trace[:idx0]) + Vtr[ttr >= 0] = np.mean(Vtr_trace[idx40:idx60]) + + ftinput = np.fft.fft(np.diff(Vpn[idx0:idx1])) + ftoutput = np.fft.fft(pn_traces[idx0:idx1 - 1]) + kernel = np.fft.ifft(ftoutput / ftinput) + kernel = np.append(kernel, np.zeros(len(I_trace) - len(kernel))+np.mean(kernel[-20:])) + kerneldic[idx] = {} + kerneldic[idx][kernelcurrenttrace + affix] = I_trace - np.convolve(np.diff(Vtr), kernel)[:len(I_trace)] #- np.convolve(np.diff(Vtr),kernel2)[:len(I_trace)] + ''' end of kernels ''' + + + if len(pn_trace) < len(I_trace): + I_trace = I_trace[:len(pn_trace)] + elif len(pn_trace) > len(I_trace): + pn_trace = pn_trace[:len(I_trace)] + + currentdic[idx] = {} + currentdic[idx][newcurrenttrace+affix] = I_trace - pn_trace + + currentdf = pd.DataFrame(currentdic, index=df[df.type == 'Trace'].index) + currentdf.columns = currentdf.columns.str.split(';', expand=True) + df = pd.concat([df, currentdf], axis=1) + df = df.drop(index = df[df.type == 'PNSubtraction'].index) + return df + + +def QualityControl(df, currenttrace='Current-1', potentialtrace='V-1', delimiter=';'): + ''' + only for VoltageClamp experiments + ''' + + qc = [[]]*len(df[df.type == 'QualityControl']) + prefix = 'qualitycontrol' + delimiter + affix = '' + affix_num = len(list(df)[0]) - 2 + for i in range(affix_num): + affix += delimiter + + # assume that stimulus has 10ms at holdingpotential followed by 10ms at holdingpotential-20 + samplingrate = df.samplingrate.iloc[0] + idx10ms = int(0.01 * samplingrate) + idx20ms = int(0.02 * samplingrate) + idx1ms = int(0.001 * samplingrate) + + for i in range(len(qc)): + qc[i] = {} + qc[i][prefix + currenttrace + affix] = df[df.type == 'QualityControl'][currenttrace].iloc[i] + qc[i][prefix + potentialtrace + affix] = df[df.type == 'QualityControl'][potentialtrace].iloc[i] + qc[i][prefix + 'onset_times' + affix] = df[df.type == 'QualityControl']['onset_times'].iloc[i] + qc[i][prefix + 'holdingpotential' + affix] = df[df.type == 'QualityControl']['tag_meta']['settings']['holdingpotential'].iloc[i] + if 'TraceId' in df.columns: + qc[i][prefix + 'TraceId' + affix] = df[df.type == 'QualityControl']['TraceId'].iloc[i] + + I1 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx10ms-idx1ms : idx10ms - 2]) + I2 = np.mean(df[df.type == 'QualityControl'][currenttrace].iloc[i][idx20ms-idx1ms : idx20ms - 2]) + R = 20/(I1-I2) + qc[i][prefix + 'resistance' + affix] = R + qc_df = pd.DataFrame(qc) + + if len(qc) == 0: + return df + + qc_empty = [{}] + for key in qc[0].keys(): + qc_empty[0][key] = np.nan + qc_dic = qc_empty * len(df) + + for idx in df[df.type == 'Trace'].index: + i = np.where(df.index == idx)[0][0] + if 'TraceId' in df.columns: + qc_dic[i] = qc[np.where(qc_df[prefix + 'TraceId' + affix] == df.TraceId.loc[idx])[0][0]] + else: + None + delays = qc_df[prefix + 'onset_times' + affix] - df['onset_times'].loc[idx] + maxidx = np.where(delays[np.array(delays) < 0])[0][0] + qc_dic[i] = qc[maxidx] + + qc = pd.DataFrame(qc_dic, index=df.index) + qc.columns = qc.columns.str.split(';', expand=True) + + df = pd.concat([df, qc], axis=1) + df = df.drop(index=df[df.type == 'QualityControl'].index) + return df + diff --git a/ephys_analysis.py b/ephys_analysis.py index 882ae9f..1af0b94 100644 --- a/ephys_analysis.py +++ b/ephys_analysis.py @@ -7,17 +7,12 @@ from IPython import embed def stimulus_collector(filename): - try: - stims = [] - f = nix.File.open(filename, nix.FileMode.ReadOnly) - b = f.blocks[0] - for g in b.groups: - # embed() - # exit() - stims.append(g.multi_tags[0].name) - except KeyError: - embed() - exit() + stims = [] + f = nix.File.open(filename, nix.FileMode.ReadOnly) + b = f.blocks[0] + embed() + exit() + return stims @@ -26,22 +21,18 @@ def analyze_sams(filename): b = f.blocks[0] b.metadata.pprint(max_depth=-1) # max_depth=-1: alles rausschreiben for g in b.groups: - if 'sam' in g.name.lower(): # go through loop, until 'sam' is found - break - # embed() - # exit() - rtag_data = g.tags[0] + break + + rtag_data = g.tags[0] # rtag_data = tags within this group rtag_data.metadata.pprint(max_depth=-1) print(40*'*') stim_tag = g.multi_tags[0] stim_type = g.multi_tags[0].name - stim_pos = stim_tag.positions[:] # beginnings of stimulations - stim_extent = stim_tag.extents[:] # duration of stimulations - # for r in rtag_data.references: - # print(r.name, r.type) + stim_pos = stim_tag.positions[:] # beginnings of stimuli + stim_extent = stim_tag.extents[:] # duration of stimuli voltage_trace = rtag_data.references['V-1'] @@ -56,15 +47,16 @@ def analyze_sams(filename): stim_y = [np.max(voltage_trace)+10, np.max(voltage_trace)+10] plt.plot([stim_pos[i], stim_pos[i] + stim_extent[i]], stim_y, 'k') + os.chdir('/home/lisa/Dropbox/Masterarbeit/') for j in range(len(spike_data)): for k in range(len(spike_data[j])): - plt.plot(spike_data[j][k], np.max(voltage_trace)+5, 'o', color='k') - plt.xlim([stim_pos[0]-1, stim_extent[-1]+1]) - plt.title(filename[-26:-13] + ': ' + stim_type) - # plt.show() + plt.plot(spike_data[j][k], np.max(voltage_trace)+2.5, 'o', color='k') + plt.xlim([stim_pos[0]-.5, stim_pos[0] + stim_extent[-1] + .5]) + plt.xlabel('time [s]') + plt.ylabel('voltage [mV]') + plt.show() print('saving...') - os.chdir('/home/lisa/Dropbox/Masterarbeit/') - plt.savefig(filename[-26:-13] + ': ' + stim_type + '.png') + plt.savefig(filename + stim_type + '.png') plt.close() # f.close() # embed() @@ -73,13 +65,61 @@ def analyze_sams(filename): return +def analyze_fis(filename): + f = nix.File.open(filename, nix.FileMode.ReadOnly) + b = f.blocks[0] + b.metadata.pprint(max_depth=-1) # max_depth=-1: alles rausschreiben + for g in b.groups: + # if 'FICurve' in g.name: # go through loop, until 'sam' is found + # break + + rtag_data = g.tags[0] # rtag_data = tags within this group + rtag_data.metadata.pprint(max_depth=-1) + + print(40*'*') + + stim_tag = g.multi_tags[0] + stim_type = g.multi_tags[0].name + stim_pos = stim_tag.positions[:] # beginnings of stimuli + stim_extent = stim_tag.extents[:] # duration of stimuli + + voltage_trace = rtag_data.references['V-1'] + for i in range(len(voltage_trace)): + print(voltage_trace[i]) + spike_data = [] + + for idx in range(len(stim_pos)): + spike_data.append(stim_tag.retrieve_data(idx, 'Spikes-1')[:]) + + + # embed() + # exit() + dims = voltage_trace.dimensions[0].axis(len(voltage_trace)) + + for i in range(len(stim_pos)): + plt.plot(dims[:-1], voltage_trace[:-1], color='#3673A4') + stim_y = [np.max(voltage_trace)+1.25, np.max(voltage_trace)+1.25] + plt.plot([stim_pos[i], stim_pos[i] + stim_extent[i]], stim_y, 'k') + xmin = stim_pos[i] - .1 + xmax = stim_pos[i] + stim_extent[i] + .1 + y = np.ones(len(spike_data[i]))*(np.max(voltage_trace)+2.5) + plt.plot(spike_data[i], y, 'o', color='#DC143C') + plt.xlim(xmin, xmax) + plt.xlabel('time [s]') + plt.ylabel('voltage [mV]') + plt.show() + + return + if __name__ == '__main__': data_dir = '/home/lisa/data/' os.chdir(data_dir) - data_set = glob.glob('2019-*') + data_set = glob.glob('2019*') # only look at single cells + # data_set = '2019*' # only look at single cells for i in range(len(data_set)): - # data_set = '2019-08-22-aa-invivo-1' - print(data_dir + data_set[i] + '/' + data_set[i]) + if '08' in data_set[i]: + continue + # print(data_dir + data_set + '/' + data_set) analyze_sams(data_dir + data_set[i] + '/' + data_set[i] + '.nix') - # analyze_sams(data_dir + data_set + '/' + data_set + '.nix') + diff --git a/frame_analysis.py b/frame_analysis.py new file mode 100644 index 0000000..d2dda3a --- /dev/null +++ b/frame_analysis.py @@ -0,0 +1,32 @@ +import numpy as np +import matplotlib.pyplot as plt +import glob +import os +import NixFrame as nf +from IPython import embed + +if __name__ == '__main__': + data_dir = '/home/lisa/data/' + os.chdir(data_dir) + data_sets = glob.glob('2019-*') + for data_set in data_sets: + print(data_set) + df = nf.load_data(data_dir + data_set + '/' + data_set + '_dataframe.pickle') + embed() + exit() + for i in range(len(df)): + + stim_type = df['tag_meta']['RePro'][i] + smpl_rt = df['samplingrate'][i] + # t = df['time'][i+1] + voltage = df['V-1'][i] + t = np.arange(0, len(voltage)*1./smpl_rt, 1./smpl_rt) + print(len(t)) + spiketimes = df['Spikes-1'][i] + stim_onset = df['onset_times'][i] + stim_dur = df['durations'][i] + + plt.plot(t, voltage, color='#BA2D22') + plt.plot([stim_onset, stim_dur], [np.max(voltage)+0.75, np.max(voltage)+0.75], color='#53379B') + plt.plot([spiketimes, spiketimes], [np.max(voltage)+1, np.max(voltage)+1.5], color='black') + plt.show() \ No newline at end of file diff --git a/frame_maker.py b/frame_maker.py new file mode 100644 index 0000000..ed05de7 --- /dev/null +++ b/frame_maker.py @@ -0,0 +1,8 @@ +from IPython import embed +import NixFrame as nf + +folder = '../../data' + +df = nf.NixToFrame(folder, before=0.0, after=0.0, skipold=True, saveto=None, mindepth=0) + +print(df)