import numpy as np import pandas as pd import matplotlib.pyplot as plt def list_of_tuple_strings_to_float(strings): parts = strings[1:-1].strip().split(',') start_times = [] end_times = [] for i in range(len(parts)): part = parts[i].strip() if len(part) == 0: continue if i % 2 == 0: start_times.append(float(part[1:])) else: end_times.append(float(part[:-1])) return start_times, end_times def get_visits(df, subject, day): visits = [] #subjects = ['lepto03', 'lepto20', 'lepto40', 'lepto42', 'lepto43', 'lepto46', 'lepto48'] #for subject in subjects: for feeder_number in range(1, 9): visits.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0]) return visits def get_feeder_risks(df, day): """Returns the risks associated with the feeders for a given experimental day. Args: df ([type]): [description] day ([type]): [description] Returns: [type]: [description] """ risks = [] for feeder_number in range(1, 9): risks.append(df["feeder_%i_risk" % feeder_number][(df.day == day)].values[0]) return risks def get_feeder_visit_counts(df, subject, day): visit_counts = [] for feeder_number in range(1, 9): visit_counts.append(df["feeder_%i_visits" % feeder_number][(df.day == day) & (df.subject == subject)].values[0]) return visit_counts def get_feeder_visit_times(df, subject, day): """ Get the visit times and visit durations for each feeder for a given subject on a given day. Args: df ([type]): [description] subject ([type]): [description] day ([type]): [description] Returns: visit_times [list]: list of 8 entries containing all times a particular feeder was visited visit_durations [list]: list of 8 entries containing the durations of each feeder visit """ visit_times = [] visit_durations = [] data = df[df.subject == subject] days = data.day.unique() if day not in days: raise ValueError("Subject %s was not recorded for day %s." % (subject, day)) for feeder_number in range(1, 9): times = df["feeder_%i_visit_times" % feeder_number][(df.day == day) & (df.subject == subject)].values[0] start_times, end_times = list_of_tuple_strings_to_float(times) duration = [] for start, end in zip(start_times, end_times): duration.append(end - start) visit_times.append(start_times) visit_durations.append(duration) return visit_times, visit_durations def get_first_visits(visit_times, feeder_risks): """Returns the times, risks, and feeder numbers of only the first visit to the feeders. lists are sorted by the visit time. Args: visit_times ([type]): [description] feeder_risks ([type]): [description] Returns: [type]: [description] """ first_visits = [] risks = [] feeder_numbers = [] for i, vt in enumerate(visit_times): if len(vt) == 0: continue first_visits.append(vt[0]) risks.append(feeder_risks[i]) feeder_numbers.append(i) sorted_risks = np.asarray(risks)[np.argsort(first_visits)] sorted_numbers = np.asarray(feeder_numbers)[np.argsort(first_visits)] sorted_times = np.asarray(first_visits)[np.argsort(first_visits)] return sorted_times, sorted_risks, sorted_numbers def estimate_boldness(visit_times, feeder_risks): """Estimates the boldness score based on the feeder visit_times and feeder risks Args: visit_times ([type]): [description] feeder_risks ([type]): [description] Returns: [type]: [description] """ _, sorted_risks, _ = get_first_visits(visit_times, feeder_risks) boldness_gain = np.zeros_like(sorted_risks) boldness_gain[sorted_risks > 1] = 1 x = 0 y = 0 score = 0 intersection = np.arange(len(sorted_risks) + 1) for gain in boldness_gain: if gain == 0: x += 1 else: y += 1 score += y - intersection[x] if len(sorted_risks) == 0: return None, None boldness = score / len(sorted_risks) return boldness, len(sorted_risks) def get_boldness_score(df, subject): """Get the boldness scores for one subject. Args: df ([type]): [description] subject ([type]): [description] Returns: [type]: [description] """ days = df.day[df.subject == subject].unique() boldness_scores = [] for d in days: visit_times, _ = get_feeder_visit_times (df, subject, d) risks = get_feeder_risks(df, d) boldness, count = estimate_boldness(visit_times, risks) b = {"subject": subject, "day": d, "boldness": boldness, "total_visits": count} if boldness is None: continue boldness_scores.append(b) return boldness_scores def plot_boldness_analysis(df, subject, day, max_feeder_count=8): """Create a plot for one boldness estimate of one subject on one day. Args: df ([type]): [description] subject ([type]): [description] day ([type]): [description] """ times, _ = get_feeder_visit_times(df, subject, day) feeder_risks = get_feeder_risks(df, day) first_visits, risks, numbers = get_first_visits(times, feeder_risks) boldness_gain = np.zeros_like(risks) boldness_gain[risks > 1] = 1 x = 0 y = 0 fig = plt.figure(figsize=(5,5)) axis = fig.add_subplot(111) axis.plot(np.arange(max_feeder_count/2 + 1), ls="dashed", color="silver", lw=1.0) axis.set_xlim([-0.1, 4.2]) axis.set_ylim([-0.1, 4.2]) axis.set_xticks(range(int(max_feeder_count/2) + 1)) axis.set_yticks(range(int(max_feeder_count/2) + 1)) axis.set_xlabel("feeder visits dark") axis.set_ylabel("feeder visits light") for t, gain in zip(first_visits, boldness_gain): if gain == 0: x += 1 else: y += 1 axis.scatter(x, y, s=20, marker="*", color="k") axis.text(x, y + 0.1, "%.2f s" % t, ha="center", va="bottom") fig.savefig("boldness_analysis_%s_%s.pdf" % (subject, day)) plt.close() if __name__ == "__main__": df = pd.read_csv("../data/feeder_visits.csv", sep=';', index_col=0) subjects = df.subject.unique() all_boldness_scores = [] for subject in subjects: all_boldness_scores.extend(get_boldness_score(df, subject)) scores = pd.DataFrame(all_boldness_scores) scores.to_csv("boldness_scores.csv", sep=";") for s in subjects: days = df.day[df.subject == s].unique() print(s, np.mean(scores.boldness[scores.subject == s]), np.mean(scores.total_visits[scores.subject == s])) for day in days: plot_boldness_analysis(df, s, day) plot_boldness_analysis(df, s, day)