personality/boldness/code/boldness_estimation.py

213 lines
6.6 KiB
Python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def list_of_tuple_strings_to_float(strings):
parts = strings[1:-1].strip().split(',')
start_times = []
end_times = []
for i in range(len(parts)):
part = parts[i].strip()
if len(part) == 0:
continue
if i % 2 == 0:
start_times.append(float(part[1:]))
else:
end_times.append(float(part[:-1]))
return start_times, end_times
def get_visits(df, subject, day):
visits = []
#subjects = ['lepto03', 'lepto20', 'lepto40', 'lepto42', 'lepto43', 'lepto46', 'lepto48']
#for subject in subjects:
for feeder_number in range(1, 9):
visits.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0])
return visits
def get_feeder_risks(df, day):
"""Returns the risks associated with the feeders for a given experimental day.
Args:
df ([type]): [description]
day ([type]): [description]
Returns:
[type]: [description]
"""
risks = []
for feeder_number in range(1, 9):
risks.append(df["feeder_%i_risk" % feeder_number][(df.day == day)].values[0])
return risks
def get_feeder_visit_counts(df, subject, day):
visit_counts = []
for feeder_number in range(1, 9):
visit_counts.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0])
return visit_counts
def get_feeder_visit_times(df, subject, day):
"""
Get the visit times and visit durations for each feeder for a given subject on a given day.
Args:
df ([type]): [description]
subject ([type]): [description]
day ([type]): [description]
Returns:
visit_times [list]: list of 8 entries containing all times a particular feeder was visited
visit_durations [list]: list of 8 entries containing the durations of each feeder visit
"""
visit_times = []
visit_durations = []
data = df[df.subject == subject]
days = data.day.unique()
if day not in days:
raise ValueError("Subject %s was not recorded for day %s." % (subject, day))
for feeder_number in range(1, 9):
times = df["feeder_%i_visit_times" % feeder_number][(df.day == day) & (df.subject == subject)].values[0]
start_times, end_times = list_of_tuple_strings_to_float(times)
duration = []
for start, end in zip(start_times, end_times):
duration.append(end - start)
visit_times.append(start_times)
visit_durations.append(duration)
return visit_times, visit_durations
def get_first_visits(visit_times, feeder_risks):
"""Returns the times, risks, and feeder numbers of only the first visit to the feeders.
lists are sorted by the visit time.
Args:
visit_times ([type]): [description]
feeder_risks ([type]): [description]
Returns:
[type]: [description]
"""
first_visits = []
risks = []
feeder_numbers = []
for i, vt in enumerate(visit_times):
if len(vt) == 0:
continue
first_visits.append(vt[0])
risks.append(feeder_risks[i])
feeder_numbers.append(i)
sorted_risks = np.asarray(risks)[np.argsort(first_visits)]
sorted_numbers = np.asarray(feeder_numbers)[np.argsort(first_visits)]
sorted_times = np.asarray(first_visits)[np.argsort(first_visits)]
return sorted_times, sorted_risks, sorted_numbers
def estimate_boldness(visit_times, feeder_risks):
"""Estimates the boldness score based on the feeder visit_times and feeder risks
Args:
visit_times ([type]): [description]
feeder_risks ([type]): [description]
Returns:
[type]: [description]
"""
_, sorted_risks, _ = get_first_visits(visit_times, feeder_risks)
boldness_gain = np.zeros_like(sorted_risks)
boldness_gain[sorted_risks > 1] = 1
x = 0
y = 0
score = 0
intersection = np.arange(len(sorted_risks) + 1)
for gain in boldness_gain:
if gain == 0:
x += 1
else:
y += 1
score += y - intersection[x]
boldness = score / len(sorted_risks)
return boldness, len(sorted_risks)
def get_boldness_score(df, subject):
"""Get the boldness scores for one subject.
Args:
df ([type]): [description]
subject ([type]): [description]
Returns:
[type]: [description]
"""
days = df.day[df.subject == subject].unique()
boldness_scores = []
for d in days:
visit_times, _ = get_feeder_visit_times (df, subject, d)
risks = get_feeder_risks(df, d)
boldness, count = estimate_boldness(visit_times, risks)
b = {"subject": subject, "day": d, "boldness": boldness, "total_visits": count}
boldness_scores.append(b)
return boldness_scores
def plot_boldness_analysis(df, subject, day, max_feeder_count=8):
"""Create a plot for one boldness estimate of one subject on one day.
Args:
df ([type]): [description]
subject ([type]): [description]
day ([type]): [description]
"""
times, _ = get_feeder_visit_times(df, subject, day)
feeder_risks = get_feeder_risks(df, day)
first_visits, risks, numbers = get_first_visits(times, feeder_risks)
boldness_gain = np.zeros_like(risks)
boldness_gain[risks > 1] = 1
x = 0
y = 0
fig = plt.figure(figsize=(5,5))
axis = fig.add_subplot(111)
axis.plot(np.arange(max_feeder_count/2 + 1), ls="dashed", color="silver", lw=1.0)
axis.set_xlim([-0.1, 4.2])
axis.set_ylim([-0.1, 4.2])
axis.set_xticks(range(int(max_feeder_count/2) + 1))
axis.set_yticks(range(int(max_feeder_count/2) + 1))
axis.set_xlabel("feeder visits dark")
axis.set_ylabel("feeder visits light")
for t, gain in zip(first_visits, boldness_gain):
if gain == 0:
x += 1
else:
y += 1
axis.scatter(x, y, s=20, marker="*", color="k")
axis.text(x, y + 0.1, "%.2f s" % t, ha="center", va="bottom")
fig.savefig("boldness_analysis_%s_%s.pdf" % (subject, day))
plt.close()
if __name__ == "__main__":
df = pd.read_csv("../data/feeder_visits.csv", sep=';', index_col=0)
subjects = df.subject.unique()
all_boldness_scores = []
for subject in subjects:
all_boldness_scores.extend(get_boldness_score(df, subject))
scores = pd.DataFrame(all_boldness_scores)
scores.to_csv("boldness_scores.csv", sep=";")
for s in subjects:
print(s, np.mean(scores.boldness[scores.subject == s]), np.mean(scores.total_visits[scores.subject == s]))
plot_boldness_analysis(df, s, "day_4")
plot_boldness_analysis(df, s, "day_5")