From 92b3e079a34ea38d3ef10e24bbb6fc860820a02e Mon Sep 17 00:00:00 2001 From: Jan Grewe Date: Wed, 2 Jun 2021 16:17:13 +0200 Subject: [PATCH] [boldness] boldness estimation --- boldness/code/boldness_estimation.py | 212 +++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 boldness/code/boldness_estimation.py diff --git a/boldness/code/boldness_estimation.py b/boldness/code/boldness_estimation.py new file mode 100644 index 0000000..7ec493a --- /dev/null +++ b/boldness/code/boldness_estimation.py @@ -0,0 +1,212 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +def list_of_tuple_strings_to_float(strings): + parts = strings[1:-1].strip().split(',') + start_times = [] + end_times = [] + + for i in range(len(parts)): + part = parts[i].strip() + if len(part) == 0: + continue + if i % 2 == 0: + start_times.append(float(part[1:])) + else: + end_times.append(float(part[:-1])) + return start_times, end_times + + +def get_visits(df, subject, day): + visits = [] + #subjects = ['lepto03', 'lepto20', 'lepto40', 'lepto42', 'lepto43', 'lepto46', 'lepto48'] + #for subject in subjects: + for feeder_number in range(1, 9): + visits.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0]) + return visits + + +def get_feeder_risks(df, day): + """Returns the risks associated with the feeders for a given experimental day. + + Args: + df ([type]): [description] + day ([type]): [description] + + Returns: + [type]: [description] + """ + risks = [] + for feeder_number in range(1, 9): + risks.append(df["feeder_%i_risk" % feeder_number][(df.day == day)].values[0]) + + return risks + + +def get_feeder_visit_counts(df, subject, day): + visit_counts = [] + + for feeder_number in range(1, 9): + visit_counts.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0]) + + return visit_counts + + +def get_feeder_visit_times(df, subject, day): + """ + Get the visit times and visit durations for each feeder for a given subject on a given day. + + Args: + df ([type]): [description] + subject ([type]): [description] + day ([type]): [description] + + Returns: + visit_times [list]: list of 8 entries containing all times a particular feeder was visited + visit_durations [list]: list of 8 entries containing the durations of each feeder visit + """ + visit_times = [] + visit_durations = [] + data = df[df.subject == subject] + days = data.day.unique() + if day not in days: + raise ValueError("Subject %s was not recorded for day %s." % (subject, day)) + for feeder_number in range(1, 9): + times = df["feeder_%i_visit_times" % feeder_number][(df.day == day) & (df.subject == subject)].values[0] + start_times, end_times = list_of_tuple_strings_to_float(times) + duration = [] + for start, end in zip(start_times, end_times): + duration.append(end - start) + visit_times.append(start_times) + visit_durations.append(duration) + return visit_times, visit_durations + + +def get_first_visits(visit_times, feeder_risks): + """Returns the times, risks, and feeder numbers of only the first visit to the feeders. + lists are sorted by the visit time. + + Args: + visit_times ([type]): [description] + feeder_risks ([type]): [description] + + Returns: + [type]: [description] + """ + first_visits = [] + risks = [] + feeder_numbers = [] + for i, vt in enumerate(visit_times): + if len(vt) == 0: + continue + first_visits.append(vt[0]) + risks.append(feeder_risks[i]) + feeder_numbers.append(i) + sorted_risks = np.asarray(risks)[np.argsort(first_visits)] + sorted_numbers = np.asarray(feeder_numbers)[np.argsort(first_visits)] + sorted_times = np.asarray(first_visits)[np.argsort(first_visits)] + + return sorted_times, sorted_risks, sorted_numbers + + +def estimate_boldness(visit_times, feeder_risks): + """Estimates the boldness score based on the feeder visit_times and feeder risks + + Args: + visit_times ([type]): [description] + feeder_risks ([type]): [description] + + Returns: + [type]: [description] + """ + _, sorted_risks, _ = get_first_visits(visit_times, feeder_risks) + boldness_gain = np.zeros_like(sorted_risks) + boldness_gain[sorted_risks > 1] = 1 + x = 0 + y = 0 + score = 0 + intersection = np.arange(len(sorted_risks) + 1) + for gain in boldness_gain: + if gain == 0: + x += 1 + else: + y += 1 + score += y - intersection[x] + + boldness = score / len(sorted_risks) + return boldness, len(sorted_risks) + + +def get_boldness_score(df, subject): + """Get the boldness scores for one subject. + + Args: + df ([type]): [description] + subject ([type]): [description] + + Returns: + [type]: [description] + """ + days = df.day[df.subject == subject].unique() + boldness_scores = [] + for d in days: + visit_times, _ = get_feeder_visit_times (df, subject, d) + risks = get_feeder_risks(df, d) + boldness, count = estimate_boldness(visit_times, risks) + b = {"subject": subject, "day": d, "boldness": boldness, "total_visits": count} + boldness_scores.append(b) + return boldness_scores + + +def plot_boldness_analysis(df, subject, day, max_feeder_count=8): + """Create a plot for one boldness estimate of one subject on one day. + + Args: + df ([type]): [description] + subject ([type]): [description] + day ([type]): [description] + """ + times, _ = get_feeder_visit_times(df, subject, day) + feeder_risks = get_feeder_risks(df, day) + first_visits, risks, numbers = get_first_visits(times, feeder_risks) + boldness_gain = np.zeros_like(risks) + boldness_gain[risks > 1] = 1 + x = 0 + y = 0 + + fig = plt.figure(figsize=(5,5)) + axis = fig.add_subplot(111) + axis.plot(np.arange(max_feeder_count/2 + 1), ls="dashed", color="silver", lw=1.0) + axis.set_xlim([-0.1, 4.2]) + axis.set_ylim([-0.1, 4.2]) + axis.set_xticks(range(int(max_feeder_count/2) + 1)) + axis.set_yticks(range(int(max_feeder_count/2) + 1)) + axis.set_xlabel("feeder visits dark") + axis.set_ylabel("feeder visits light") + for t, gain in zip(first_visits, boldness_gain): + if gain == 0: + x += 1 + else: + y += 1 + axis.scatter(x, y, s=20, marker="*", color="k") + axis.text(x, y + 0.1, "%.2f s" % t, ha="center", va="bottom") + fig.savefig("boldness_analysis_%s_%s.pdf" % (subject, day)) + plt.close() + + +if __name__ == "__main__": + df = pd.read_csv("../data/feeder_visits.csv", sep=';', index_col=0) + + subjects = df.subject.unique() + all_boldness_scores = [] + for subject in subjects: + all_boldness_scores.extend(get_boldness_score(df, subject)) + scores = pd.DataFrame(all_boldness_scores) + scores.to_csv("boldness_scores.csv", sep=";") + for s in subjects: + print(s, np.mean(scores.boldness[scores.subject == s]), np.mean(scores.total_visits[scores.subject == s])) + + plot_boldness_analysis(df, s, "day_4") + plot_boldness_analysis(df, s, "day_5") +