new project on serial correlations

This commit is contained in:
Jan Benda 2017-01-22 22:47:50 +01:00
parent 1e5c588c31
commit a932ca022f
12 changed files with 706 additions and 0 deletions

View File

@ -0,0 +1,10 @@
latex:
pdflatex *.tex > /dev/null
pdflatex *.tex > /dev/null
clean:
rm -rf *.log *.aux *.zip *.out auto
rm -f `basename *.tex .tex`.pdf
zip: latex
zip `basename *.tex .tex`.zip *.pdf *.dat *.mat *.m

Binary file not shown.

View File

@ -0,0 +1,365 @@
from os import path
try:
from itertools import izip
except:
izip = zip
import types
from numpy import array, arange, NaN, fromfile, float32, asarray, unique, squeeze, Inf, isnan, fromstring
from numpy.core.records import fromarrays
#import nixio as nix
import re
import warnings
identifiers = {
'stimspikes1.dat': lambda info: ('RePro' in info[-1] and info[-1]['RePro'] == 'FileStimulus'),
'samallspikes1.dat': lambda info: ('RePro' in info[-1] and info[-1]['RePro'] == 'SAM'),
}
def isfloat(value):
try:
float(value)
return True
except ValueError:
return False
def info_filter(iter, filterfunc):
for info, key, dat in iter:
if filterfunc(info):
yield info, key, dat
def iload_io_pairs(basedir, spikefile, traces, filterfunc=None):
"""
Iterator that returns blocks of spike traces and spike times from the base directory basedir (e.g. 2014-06-06-aa)
and the spiketime file (e.g. stimspikes1.dat). A filter function can filter out unwanted blocks. It gets the info
(see iload and iload trace_trials) from all traces and spike times and has to return True is the block is wanted
and False otherwise.
:param basedir: basis directory of the recordings (e.g. 2014-06-06-aa)
:param spikefile: spikefile (e.g. stimspikes1.dat)
:param traces: trace numbers as a list (e.g. [1,2])
:param filterfunc: function that gets the infos from all traces and spike times and indicates whether the block is wanted or not
"""
if filterfunc is None: filterfunc = lambda inp: True
if type(traces) is not types.ListType:
traces = [traces]
assert spikefile in identifiers, """iload_io_pairs does not know how to identify trials in stimuli.dat which
correspond to trials in {0}. Please update pyRELACS.DataLoader.identifiers
accordingly""".format(spikefile)
iterators = [info_filter(iload_trace_trials(basedir, tn), identifiers[spikefile]) for tn in traces] \
+ [iload_spike_blocks(basedir + '/' + spikefile)]
for stuff in izip(*iterators):
info, key, dat = zip(*stuff)
if filterfunc(*info):
yield info, key, dat
def iload_spike_blocks(filename):
"""
Loades spike times from filename and merges trials with incremental trial numbers into one block.
Spike times are assumed to be in seconds and are converted into ms.
"""
current_trial = -1
ret_dat = []
old_info = old_key = None
for info, key, dat in iload(filename):
if 'trial' in info[-1]:
if int(info[-1]['trial']) != current_trial + 1:
yield old_info[:-1], key, ret_dat
ret_dat = []
current_trial = int(info[-1]['trial'])
if not any(isnan(dat)):
ret_dat.append(squeeze(dat)/1000.)
else:
ret_dat.append(array([]))
old_info, old_key = info, key
else:
if len(ret_dat) > 0:
yield old_info[:-1], old_key, ret_dat
ret_dat = []
yield info, key, dat
else:
if len(ret_dat) > 0:
yield old_info[:-1], old_key, ret_dat
def iload_trace_trials(basedir, trace_no=1, before=0.0, after=0.0 ):
"""
returns:
info : metadata from stimuli.dat
key : key from stimuli.dat
data : the data of the specified trace of all trials
"""
x = fromfile('%s/trace-%i.raw' % (basedir, trace_no), float32)
p = re.compile('([-+]?\d*\.\d+|\d+)\s*(\w+)')
for info, key, dat in iload('%s/stimuli.dat' % (basedir,)):
X = []
val, unit = p.match(info[-1]['duration']).groups()
val = float( val )
if unit == 'ms' :
val *= 0.001
duration_index = key[2].index('duration')
# if 'RePro' in info[1] and info[1]['RePro'] == 'FileStimulus':
# embed()
# exit()
sval, sunit = p.match(info[0]['sample interval%i' % trace_no]).groups()
sval = float( sval )
if sunit == 'ms' :
sval *= 0.001
l = int(before / sval)
r = int((val+after) / sval)
if dat.shape == (1,1) and dat[0,0] == 0:
warnings.warn("iload_trace_trials: Encountered incomplete '-0' trial.")
yield info, key, array([])
continue
for col, duration in zip(asarray([e[trace_no - 1] for e in dat], dtype=int), asarray([e[duration_index] for e in dat], dtype=float32)): #dat[:,trace_no-1].astype(int):
tmp = x[col-l:col + r]
if duration < 0.001: # if the duration is less than 1ms
warnings.warn("iload_trace_trials: Skipping one trial because its duration is <1ms and therefore it is probably rubbish")
continue
if len(X) > 0 and len(tmp) != len(X[0]):
warnings.warn("iload_trace_trials: Setting one trial to NaN because it appears to be incomplete!")
X.append(NaN*X[0])
else:
X.append(tmp)
yield info, key, asarray(X)
def iload_traces(basedir, repro='', before=0.0, after=0.0 ):
"""
returns:
info : metadata from stimuli.dat
key : key from stimuli.dat
time : an array for the time axis
data : the data of all traces of a single trial
"""
p = re.compile('([-+]?\d*\.\d+|\d+)\s*(\w+)')
# open traces files:
sf = []
for trace in xrange( 1, 1000000 ) :
if path.isfile( '%s/trace-%i.raw' % (basedir, trace) ) :
sf.append( open( '%s/trace-%i.raw' % (basedir, trace), 'rb' ) )
else :
break
for info, key, dat in iload('%s/stimuli.dat' % (basedir,)):
if len( repro ) > 0 and repro != info[1]['RePro'] :
continue
val, unit = p.match(info[-1]['duration']).groups()
val = float( val )
if unit == 'ms' :
val *= 0.001
duration_index = key[2].index('duration')
sval, sunit = p.match(info[0]['sample interval%i' % 1]).groups()
sval = float( sval )
if sunit == 'ms' :
sval *= 0.001
l = int(before / sval)
r = int((val+after) / sval)
if dat.shape == (1,1) and dat[0,0] == 0:
warnings.warn("iload_traces: Encountered incomplete '-0' trial.")
yield info, key, array([])
continue
deltat, unit = p.match(info[0]['sample interval1']).groups()
deltat = float( deltat )
if unit == 'ms' :
deltat *= 0.001
time = arange( 0.0, l+r )*deltat - before
for d in dat :
duration = d[duration_index]
if duration < 0.001: # if the duration is less than 1ms
warnings.warn("iload_traces: Skipping one trial because its duration is <1ms and therefore it is probably rubbish")
continue
x = []
for trace in xrange( len( sf ) ) :
col = int(d[trace])
sf[trace].seek( (col-l)*4 )
buffer = sf[trace].read( (l+r)*4 )
tmp = fromstring(buffer, float32)
if len(x) > 0 and len(tmp) != len(x[0]):
warnings.warn("iload_traces: Setting one trial to NaN because it appears to be incomplete!")
x.append(NaN*x[0])
else:
x.append(tmp)
yield info, key, time, asarray( x )
def iload(filename):
meta_data = []
new_meta_data = []
key = []
within_key = within_meta_block = within_data_block = False
currkey = None
data = []
with open(filename, 'r') as fid:
for line in fid:
line = line.rstrip().lstrip()
if within_data_block and (line.startswith('#') or not line):
within_data_block = False
yield list(meta_data), tuple(key), array(data)
data = []
# Key parsing
if line.startswith('#Key'):
key = []
within_key = True
continue
if within_key:
if not line.startswith('#'):
within_key = False
else:
key.append(tuple([e.strip() for e in line[1:].split(" ") if len(e.strip()) > 0]))
continue
# fast forward to first data point or meta data
if not line:
within_key = within_meta_block = False
currkey = None
continue
# meta data blocks
elif line.startswith('#'): # cannot be a key anymore
if not within_meta_block:
within_meta_block = True
new_meta_data.append({})
if ':' in line:
tmp = [e.rstrip().lstrip() for e in line[1:].split(':')]
elif '=' in line:
tmp = [e.rstrip().lstrip() for e in line[1:].split('=')]
else:
currkey = line[1:].rstrip().lstrip()
new_meta_data[-1][currkey] = {}
continue
if currkey is None:
new_meta_data[-1][tmp[0]] = tmp[1]
else:
new_meta_data[-1][currkey][tmp[0]] = tmp[1]
else:
if not within_data_block:
within_data_block = True
n = len(new_meta_data)
meta_data[-n:] = new_meta_data
new_meta_data = []
currkey = None
within_key = within_meta_block = False
data.append([float(e) if (e != '-0' and isfloat(e)) else NaN for e in line.split()])
else: # if for loop is finished, return the data we have so far
if within_data_block and len(data) > 0:
yield list(meta_data), tuple(key), array(data)
def recload(filename):
for meta, key, dat in iload(filename):
yield meta, fromarrays(dat.T, names=key[0])
def load(filename):
"""
Loads a data file saved by relacs. Returns a tuple of dictionaries
containing the data and the header information
:param filename: Filename of the data file.
:type filename: string
:returns: a tuple of dictionaries containing the head information and the data.
:rtype: tuple
"""
with open(filename, 'r') as fid:
L = [l.lstrip().rstrip() for l in fid.readlines()]
ret = []
dat = {}
X = []
keyon = False
currkey = None
for l in L:
# if empty line and we have data recorded
if (not l or l.startswith('#')) and len(X) > 0:
keyon = False
currkey = None
dat['data'] = array(X)
ret.append(dat)
X = []
dat = {}
if '---' in l:
continue
if l.startswith('#'):
if ":" in l:
tmp = [e.rstrip().lstrip() for e in l[1:].split(':')]
if currkey is None:
dat[tmp[0]] = tmp[1]
else:
dat[currkey][tmp[0]] = tmp[1]
elif "=" in l:
tmp = [e.rstrip().lstrip() for e in l[1:].split('=')]
if currkey is None:
dat[tmp[0]] = tmp[1]
else:
dat[currkey][tmp[0]] = tmp[1]
elif l[1:].lower().startswith('key'):
dat['key'] = []
keyon = True
elif keyon:
dat['key'].append(tuple([e.lstrip().rstrip() for e in l[1:].split()]))
else:
currkey = l[1:].rstrip().lstrip()
dat[currkey] = {}
elif l: # if l != ''
keyon = False
currkey = None
X.append([float(e) for e in l.split()])
if len(X) > 0:
dat['data'] = array(X)
else:
dat['data'] = []
ret.append(dat)
return tuple(ret)

Binary file not shown.

View File

@ -0,0 +1,30 @@
03-03-28-ab
03-03-28-ac
03-03-28-af
03-03-31-ad
03-03-31-af
03-03-31-ai
03-03-31-ak
03-03-31-al
03-04-07-ab
03-04-07-ac
03-04-07-ad
03-04-07-ae
03-04-07-af
03-04-07-ag
03-04-07-aj
03-04-10-aa
03-04-10-ac
03-04-10-ad
03-04-10-ae
03-04-10-af
03-04-10-ag
03-04-10-ah
03-04-10-ai
03-04-10-aj
03-04-14-ab
03-04-14-ad
03-04-14-ae
03-04-14-af
03-04-14-ah
03-04-16-ab

View File

@ -0,0 +1,45 @@
import numpy as np
from scipy.io import savemat
import matplotlib.pyplot as plt
import DataLoader as dl
cells = []
spikes = []
with open('goodbaselinefiles.dat') as sf:
for line in sf:
cell = line.strip()
datapath = '/data/jan/data/efish/smallchirps/single/data/' + cell
for info, key, data in dl.iload(datapath + '/basespikes.dat'):
cells.append(cell)
spikes.append(data[:, 0])
break
spikesobj = np.zeros((len(spikes), ), dtype=np.object)
cellsobj = np.zeros((len(cells), ), dtype=np.object)
for k in range(len(spikes)):
spikesobj[k] = 0.001*spikes[k]
cellsobj[k] = cells[k]
savemat('baselinespikes.mat', mdict={'cells': cellsobj, 'spikes': spikesobj})
exit()
trial = 0
intensities = []
spikes = []
for info, key, data in dl.iload('03-04-07-ab-fispikes.dat'):
if info[0]['Index'] == '1':
trial = int(info[1]['Trial'])
intensity = float(info[1]['Intensity'].replace('mV/cm', ''))
if trial == 0:
intensities.append(intensity)
spikes.append([])
prevtrial = trial
spikes[-1].append(data[:, 0])
spikesobj = np.zeros((len(spikes), len(spikes[0])), dtype=np.object)
for k in range(len(spikes)):
for j in range(len(spikes[k])):
spikesobj[k, j] = 0.001*spikes[k][j]
savemat('ficurvespikes.mat', mdict={'intensities': intensities, 'spikes': spikesobj})

View File

@ -0,0 +1,81 @@
\documentclass[addpoints,11pt]{exam}
\usepackage{url}
\usepackage{color}
\usepackage{hyperref}
\pagestyle{headandfoot}
\runningheadrule
\firstpageheadrule
\firstpageheader{Scientific Computing}{Project Assignment}{11/05/2014
-- 11/06/2014}
%\runningheader{Homework 01}{Page \thepage\ of \numpages}{23. October 2014}
\firstpagefooter{}{}{}
\runningfooter{}{}{}
\pointsinmargin
\bracketedpoints
%\printanswers
%\shadedsolutions
\begin{document}
%%%%%%%%%%%%%%%%%%%%% Submission instructions %%%%%%%%%%%%%%%%%%%%%%%%%
\sffamily
% \begin{flushright}
% \gradetable[h][questions]
% \end{flushright}
\begin{center}
\input{../disclaimer.tex}
\end{center}
%%%%%%%%%%%%%% Questions %%%%%%%%%%%%%%%%%%%%%%%%%
\begin{questions}
\question P-unit electroreceptor afferents of the gymnotiform weakly
electric fish \textit{Apteronotus leptorhynchus} are spontaneously
active when the fish is not electrically stimulated.
\begin{itemize}
\item How does the firing rates and the serial correlations of the
interspike intervals vary between different cells?
\end{itemize}
In the file \texttt{baselinespikes.mat} you find two variables:
\texttt{cells} is a cell-array with the names of the recorded cells
and \texttt{spikes} is a cell array containing the spike times in
seconds of recorded spontaneous activity for each of these cells.
\begin{parts}
\part Load the data! How many cells are contained in the file?
\part Plot the spike rasters of a few cells.
For the presentation, choose a few cells based on the results of
this project.
By just looking on the spike rasters, what are the differences
betwen the cells?
\part Compute the firing rate of each cell, i.e. number of spikes per time.
Illustrate the results by means of a histogram and/or box whisker plot.
\part Compute and plot the serial correlations between interspike intervals up to lag 10.
What do you observe? In what way are the interspike-interval
correlations similar betwen the cells? How do they differ?
\part Implement a permutation test for computing the significance
at a 1\,\% level of the serial correlations. Illustrate for a few
cells the computed serial correlations and the 1\,\% and 99\,\%
percentile from the permutation test. At which lag are the serial
correlations clearly significant?
\part Are the serial correlations somehow dependent on the firing rate?
Plot the significant correlations against the firing rate. Do you
observe any dependence?
\end{parts}
\end{questions}
\end{document}

View File

@ -0,0 +1,9 @@
function rate = firingrate(spikes, tmin, tmax)
% mean firing rate between tmin and tmax.
rates = zeros(length(spikes), 1);
for i = 1:length(spikes)
times= spikes{i};
rates(i) = length(times((times>=tmin)&(times<=tmax)))/(tmax-tmin);
end
rate = mean(rates);
end

View File

@ -0,0 +1,26 @@
function isicorr = isiserialcorr(spikes, maxlag)
% serial correlation of interspike intervals
%
% isicorr = isiserialcorr(spikes, maxlag)
%
% Arguments:
% spikes: spike times in seconds
% maxlag: the maximum lag
%
% Returns:
% isicorr: vector with the serial correlations for lag 0 to maxlag
isivec = [];
for k = 1:length(spikes)
times = spikes{k};
isivec = [isivec; diff(times(:))];
end
lags = 0:maxlag;
isicorr = zeros(size(lags));
for k = 1:length(lags)
lag = lags(k);
if length(isivec) > lag+10 % ensure "enough" data
isicorr(k) = corr(isivec(1:end-lag), isivec(lag+1:end));
end
end

View File

@ -0,0 +1,46 @@
function [isicorr, lowerbound, upperbound] = isiserialcorrbootstrap(spikes, maxlag)
% serial correlation of interspike intervals
%
% isicorr = isiserialcorrbootstrap(spikes, maxlag)
%
% Arguments:
% spikes: spike times in seconds
% maxlag: the maximum lag
%
% Returns:
% isicorr: vector with the serial correlations for lag 0 to maxlag
isivec = [];
for k = 1:length(spikes)
times = spikes{k};
isivec = [isivec; diff(times(:))];
end
lags = 0:maxlag;
isicorr = zeros(size(lags));
for k = 1:length(lags)
lag = lags(k);
if length(isivec) > lag+10 % ensure "enough" data
isicorr(k) = corr(isivec(1:end-lag), isivec(lag+1:end));
end
end
repeats = 1000;
isicorrshuffled = zeros(repeats, length(lags));
for i = 1:repeats
isishuffled = isivec(randperm(length(isivec)));
for k = 1:length(lags)
lag = lags(k);
if length(isivec) > lag+10 % ensure "enough" data
isicorrshuffled(i, k) = corr(isishuffled(1:end-lag), isishuffled(lag+1:end));
end
end
end
bounds = prctile(isicorrshuffled, [1.0 99], 1);
lowerbound = bounds(1, :);
upperbound = bounds(2, :);
end

View File

@ -0,0 +1,64 @@
%% load data:
data = load('baselinespikes.mat');
spikes = data.spikes;
cells = data.cells;
%% print raster:
maxn = length(spikes);
if maxn > 5
maxn = 5;
end
figure()
for k = 1:maxn
subplot(maxn, 1, k);
spikeraster(spikes(k), 0.0, 1.0)
title(cells{k})
end
%% firing rates:
rates = zeros(length(spikes), 1);
for k = 1:length(spikes)
rates(k) = firingrate(spikes(k), 0.0, 9.0);
end
figure();
subplot(1, 2, 1);
boxplot(rates);
subplot(1, 2, 2);
hist(rates, 20)
%% serial correlations:
maxlag = 10;
lags = 0:maxlag;
corrs = zeros(length(spikes), 1);
figure();
for k = 1:length(spikes)
isicorrs = isiserialcorr(spikes(k), maxlag);
corrs(k) = isicorrs(2);
plot(lags, isicorrs);
hold on;
end
hold off;
figure();
plot(rates, corrs, 'o');
ylim([-0.7 0])
%% bootstrap serial correlations:
maxlag = 10;
lags = 0:maxlag;
figure();
for k = 1:maxn
[isicorr, lowerbound, upperbound] = isiserialcorrbootstrap(spikes(k), maxlag);
subplot(maxn, 1, k);
plot(lags, isicorr, 'b', 'linewidth', 2)
hold on;
plot(lags, lowerbound, 'r', 'linewidth', 1)
plot(lags, upperbound, 'r', 'linewidth', 1)
hold off;
end

View File

@ -0,0 +1,30 @@
function spikeraster(spikes, tmin, tmax)
% Display a spike raster of the spike times given in spikes.
%
% spikeraster(spikes, tmax)
% spikes: a cell array of vectors of spike times in seconds
% tmin: plot spike raster starting at tmin seconds
% tmax: plot spike raster upto tmax seconds
ntrials = length(spikes);
for k = 1:ntrials
times = spikes{k};
times = times((times>=tmin) & (times<=tmax));
if tmax < 1.5
times = 1000.0*times; % conversion to ms
end
for i = 1:length( times )
line([times(i) times(i)],[k-0.4 k+0.4], 'Color', 'k');
end
end
if (tmax-tmin) < 1.5
xlabel('Time [ms]');
xlim([1000.0*tmin 1000.0*tmax]);
else
xlabel('Time [s]');
xlim([tmin tmax]);
end
ylabel('Trials');
ylim([0.3 ntrials+0.7 ]);
end