import nixio as nix from IPython import embed import numpy as np import os import pandas as pd import pickle def DataFrame(nixfile, savefile=False, saveto='./'): ''' opens a nix file, extracts the data and converts it to a pandas.DataFrame :param nixfile (string): path and name of .nix file :param savefile (string): if not False, the dataframe will be saved as .pickle :param saveto (string): path to save the files in NOT IMPLEMENTED YET :return dataframe (pandas.DataFrame): pandas.DataFrame with available nix data ''' block = nix.File.open(nixfile,'r').blocks[0] data_arrays = block.data_arrays names = [data_arrays[i].name for i in range(len(data_arrays))] shapes = [x.shape for x in data_arrays] data_names = np.array([[x,i] for i,x in enumerate(names) if (shapes[i][0] >= 0.999*shapes[0][0])]) data_traces = np.array([data_arrays[name][:] for name,idx in data_names]) time = data_arrays[1].dimensions[0].axis(data_arrays[1].shape[0]) dt = time[1]-time[0] block_metadata = {} block_metadata[block.id] = getMetadataDict(block.metadata) tag = block.tags tag_metadata = {} tag_id_times = {} for i in range(len(tag)): meta = tag[i].metadata tag_metadata[meta.id] = getMetadataDict(meta) tag_id_times[meta.id] = [tag[i].position[0], tag[i].position[0]+tag[i].extent[0]] data = [] stim_num = -1 protocol_idcs = np.where([' onset times' in name for name in names])[0] for i in range(len(protocol_idcs)): # print(names[int(protocol_idcs[i])].split(' onset times')[0]) protocol = names[protocol_idcs[i]].split(' onset times')[0] #skip certain protocols if 'VC=' in protocol: # print('skip this protocol') continue #number of meta data entries if i == len(protocol_idcs)-1: meta_len = len(names) - protocol_idcs[i] else: meta_len = protocol_idcs[i+1] - protocol_idcs[i] #get new line for every sweep and save the data, make a pn subtraction if necessary if any([protocol + '_pn' == string for string in names[protocol_idcs[i]:protocol_idcs[i]+meta_len]]): pn = data_arrays[protocol + '_pn'][0] sweeps = np.arange(np.abs(pn),len(data_arrays[int(protocol_idcs[i])][:]),(np.abs(pn)+1), dtype=int) else: pn = np.nan sweeps = np.arange(len(data_arrays[int(protocol_idcs[i])][:]), dtype=int) for sweep in sweeps: stim_num +=1 data.append({}) # save protocol names split_vec = protocol.split('-') if len(split_vec)>2: prot_name = split_vec[0] prot_num = int(split_vec[-1]) for j in range(len(split_vec)-2): prot_name += '-' + split_vec[j+1] else: prot_name = split_vec[0] prot_num = split_vec[-1] data[stim_num]['protocol'] = prot_name data[stim_num]['protocol_number'] = prot_num #save id data[stim_num]['id'] = data_arrays[int(protocol_idcs[i])].id #save rest of stored data for idx in range(meta_len): j = int(protocol_idcs[i] + idx) if (' durations' in names[j]) or (' onset times' in names[j]): continue if len(data_arrays[j][sweep]) == 1: data[stim_num][names[j].split(protocol + '_')[-1]] = data_arrays[j][sweep][0] else: data[stim_num][names[j].split(protocol+'_')[-1]] = data_arrays[j][sweep] data[stim_num]['samplingrate'] = 1/dt #save data arrays onset = data_arrays[protocol + ' onset times'][sweep] dur = data_arrays[protocol + ' durations'][sweep] t0 = int(onset/dt) t1 = int((onset+dur)/dt+1) data[stim_num]['onset time'] = onset data[stim_num]['duration'] = dur for name,idx in data_names: data[stim_num][name] = data_traces[int(idx)][t0:t1] for j in np.arange(int(idx)+1,protocol_idcs[0]): bool_vec = (data_arrays[names[j]][:]>=onset) & (data_arrays[names[j]][:]<=onset+dur) data[stim_num][names[j]] = np.array(data_arrays[names[j]])[bool_vec] data[stim_num]['time'] = time[t0:t1] - data[stim_num]['onset time'] #pn-subtraction (if necessary) ''' change the location of the pn (its already in the metadata, you dont need it as option ''' if pn != np.nan and np.abs(pn)>0: pn_curr = np.zeros(len(data[stim_num][name])) idx = np.where(data_names[:,0] == 'Current-1')[0][0] for j in range(int(np.abs(pn))): onset = data_arrays[protocol + ' onset times'][sweep-j-1] t0 = int(onset / dt) t1 = int(onset/dt + len(data[stim_num]['Current-1'])) pn_curr += data_traces[int(idx),t0:t1] data[stim_num]['Current-2'] = data[stim_num]['Current-1'] - pn/np.abs(pn)*pn_curr #- data[stim_num][name][0] - pn_curr[0] ''' this one saves the complete metadata in EVERY line !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!THINK OF SOMETHING BETTER!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ''' tag_id = None for key in tag_id_times.keys(): if (data[stim_num]['onset time'] >= tag_id_times[key][0]) and (data[stim_num]['onset time'] <= tag_id_times[key][1]): tag_id = key # # save metadata data[stim_num]['block_meta'] = block_metadata[list(block_metadata.keys())[0]] data[stim_num]['tag_meta'] = tag_metadata[tag_id] # add block id data[stim_num]['block_id'] = list(block_metadata.keys())[0] data[stim_num]['tag_id'] = tag_id data = pd.DataFrame(data) if savefile != False: if savefile == True: savefile = nixfile.split('/')[-1].split('.nix')[0] with open(savefile + '_dataframe.pickle', 'wb') as f: pickle.dump(data, f, -1) # create pickle-files, using the highest pickle-protocol # embed() return data def NixToFrame(folder): ''' searches subfolders of folder to convert .nix files to a pandas dataframe and saves them in the folder :param folder: path to folder that contains subfolders of year-month-day-aa style that contain .nix files ''' if folder[-1] != '/': folder = folder + '/' dirlist = os.listdir(folder) for dir in dirlist: if os.path.isdir(folder + dir): for file in os.listdir(folder+dir): if '.nix' in file: print(file) DataFrame(folder+dir+'/'+file, True, folder) def load_data(filename): with open(filename, 'rb') as f: data = pickle.load(f) # load data with pickle return data def getMetadataDict(metadata): def unpackMetadata(sec): metadata = dict() metadata = {prop.name: sec[prop.name] for prop in sec.props} if hasattr(sec, 'sections') and len(sec.sections) > 0: metadata.update({subsec.name: unpackMetadata(subsec) for subsec in sec.sections}) return metadata return unpackMetadata(metadata)