From 92b3e079a34ea38d3ef10e24bbb6fc860820a02e Mon Sep 17 00:00:00 2001
From: Jan Grewe <jan.grewe@g-node.org>
Date: Wed, 2 Jun 2021 16:17:13 +0200
Subject: [PATCH] [boldness] boldness estimation

---
 boldness/code/boldness_estimation.py | 212 +++++++++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 boldness/code/boldness_estimation.py

diff --git a/boldness/code/boldness_estimation.py b/boldness/code/boldness_estimation.py
new file mode 100644
index 0000000..7ec493a
--- /dev/null
+++ b/boldness/code/boldness_estimation.py
@@ -0,0 +1,212 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+def list_of_tuple_strings_to_float(strings):
+    parts = strings[1:-1].strip().split(',')
+    start_times = []
+    end_times = []
+
+    for i in range(len(parts)):
+        part = parts[i].strip()
+        if len(part) == 0:
+            continue
+        if i % 2 == 0:
+            start_times.append(float(part[1:])) 
+        else: 
+            end_times.append(float(part[:-1]))
+    return start_times, end_times
+
+
+def get_visits(df, subject, day):
+    visits = []
+    #subjects = ['lepto03', 'lepto20', 'lepto40', 'lepto42', 'lepto43', 'lepto46', 'lepto48']
+    #for subject in subjects:
+    for feeder_number in range(1, 9):
+        visits.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0])
+    return visits
+
+
+def get_feeder_risks(df, day):
+    """Returns the risks associated with the feeders for a given experimental day.
+
+    Args:
+        df ([type]): [description]
+        day ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    risks = []
+    for feeder_number in range(1, 9):
+        risks.append(df["feeder_%i_risk" % feeder_number][(df.day == day)].values[0])
+
+    return risks
+
+
+def get_feeder_visit_counts(df, subject, day):
+    visit_counts = []
+
+    for feeder_number in range(1, 9):
+        visit_counts.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0])
+
+    return visit_counts
+
+
+def get_feeder_visit_times(df, subject, day):
+    """
+    Get the visit times and visit durations for each feeder for a given subject on a given day.
+
+    Args:
+        df ([type]): [description]
+        subject ([type]): [description]
+        day ([type]): [description]
+
+    Returns:
+        visit_times [list]: list of 8 entries containing all times a particular feeder was visited
+        visit_durations [list]: list of 8 entries containing the durations of each feeder visit
+    """
+    visit_times = []
+    visit_durations = []
+    data = df[df.subject == subject]
+    days = data.day.unique()
+    if day not in days:
+        raise ValueError("Subject %s was not recorded for day %s." % (subject, day))
+    for feeder_number in range(1, 9):
+        times = df["feeder_%i_visit_times" % feeder_number][(df.day == day) & (df.subject == subject)].values[0]
+        start_times, end_times = list_of_tuple_strings_to_float(times)
+        duration =  []
+        for start, end in zip(start_times, end_times):
+            duration.append(end - start)
+        visit_times.append(start_times)
+        visit_durations.append(duration)
+    return visit_times, visit_durations
+
+
+def get_first_visits(visit_times, feeder_risks):
+    """Returns the times, risks, and feeder numbers of only the first visit to the feeders.
+       lists are sorted by the visit time.
+
+    Args:
+        visit_times ([type]): [description]
+        feeder_risks ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    first_visits = []
+    risks = []
+    feeder_numbers = []
+    for i, vt in enumerate(visit_times):
+        if len(vt) == 0:
+            continue
+        first_visits.append(vt[0])
+        risks.append(feeder_risks[i])
+        feeder_numbers.append(i)
+    sorted_risks = np.asarray(risks)[np.argsort(first_visits)]
+    sorted_numbers = np.asarray(feeder_numbers)[np.argsort(first_visits)]
+    sorted_times = np.asarray(first_visits)[np.argsort(first_visits)]
+    
+    return sorted_times, sorted_risks, sorted_numbers
+
+
+def estimate_boldness(visit_times, feeder_risks):
+    """Estimates the boldness score based on the feeder visit_times and feeder risks
+
+    Args:
+        visit_times ([type]): [description]
+        feeder_risks ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    _, sorted_risks, _ = get_first_visits(visit_times, feeder_risks)
+    boldness_gain = np.zeros_like(sorted_risks)
+    boldness_gain[sorted_risks > 1] = 1
+    x = 0
+    y = 0
+    score = 0
+    intersection = np.arange(len(sorted_risks) + 1)
+    for gain in boldness_gain:
+        if gain == 0:
+            x += 1
+        else:
+            y += 1
+        score += y - intersection[x]
+
+    boldness = score / len(sorted_risks)
+    return boldness, len(sorted_risks)
+
+
+def get_boldness_score(df, subject):
+    """Get the boldness scores for one subject. 
+
+    Args:
+        df ([type]): [description]
+        subject ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    days = df.day[df.subject == subject].unique()
+    boldness_scores = []
+    for d in days:
+        visit_times, _ = get_feeder_visit_times (df, subject, d)
+        risks = get_feeder_risks(df, d)
+        boldness, count = estimate_boldness(visit_times, risks)
+        b = {"subject": subject, "day": d, "boldness": boldness, "total_visits": count}
+        boldness_scores.append(b)
+    return boldness_scores
+
+
+def plot_boldness_analysis(df, subject, day, max_feeder_count=8):
+    """Create a plot for one boldness estimate of one subject on one day.
+
+    Args:
+        df ([type]): [description]
+        subject ([type]): [description]
+        day ([type]): [description]
+    """
+    times, _ = get_feeder_visit_times(df, subject, day)
+    feeder_risks = get_feeder_risks(df, day)
+    first_visits,  risks, numbers = get_first_visits(times, feeder_risks)
+    boldness_gain = np.zeros_like(risks)
+    boldness_gain[risks > 1] = 1
+    x = 0
+    y = 0
+    
+    fig = plt.figure(figsize=(5,5))
+    axis = fig.add_subplot(111)
+    axis.plot(np.arange(max_feeder_count/2 + 1), ls="dashed", color="silver", lw=1.0)
+    axis.set_xlim([-0.1, 4.2])
+    axis.set_ylim([-0.1, 4.2])
+    axis.set_xticks(range(int(max_feeder_count/2) + 1))
+    axis.set_yticks(range(int(max_feeder_count/2) + 1))
+    axis.set_xlabel("feeder visits dark")
+    axis.set_ylabel("feeder visits light")
+    for t, gain in zip(first_visits, boldness_gain):
+        if gain == 0:
+            x += 1
+        else:
+            y += 1
+        axis.scatter(x, y, s=20, marker="*", color="k")
+        axis.text(x, y + 0.1, "%.2f s" % t, ha="center", va="bottom")
+    fig.savefig("boldness_analysis_%s_%s.pdf" % (subject, day))
+    plt.close()
+
+
+if __name__ == "__main__":
+    df = pd.read_csv("../data/feeder_visits.csv", sep=';', index_col=0)
+
+    subjects = df.subject.unique()
+    all_boldness_scores = []
+    for subject in subjects:
+        all_boldness_scores.extend(get_boldness_score(df, subject))
+    scores = pd.DataFrame(all_boldness_scores)
+    scores.to_csv("boldness_scores.csv", sep=";")
+    for s in subjects:
+        print(s, np.mean(scores.boldness[scores.subject == s]), np.mean(scores.total_visits[scores.subject == s]))
+
+    plot_boldness_analysis(df, s, "day_4")
+    plot_boldness_analysis(df, s, "day_5")
+