import numpy as np import matplotlib.pyplot as plt import pandas as pd import scipy.stats as st from IPython import embed def visit_times_to_float(fish_behavior, feeder_number): start_times = [] # ich baue Start- und Endzeiten in separate Listen um das nacher einfacher voneinander abziehen zu können end_times = [] durations = [] visit_times_column = 'feeder_%i_visit_times' % feeder_number times = fish_behavior[visit_times_column].values # Alle Zeiten für diesen einen feeder if len(times) > 0 and len(times[0].strip()) > 0: tuple_strings = times[0][1:-1].split('), ') # ich muss das Ganze splitten, weil mir die daten ganz komisch ausgegeben werden tuple_strings = list(map(lambda x: x[1:].strip(')'), tuple_strings)) for ts in tuple_strings: temp = ts.split(',') # tuple an der Kommastelle splitten if len(temp) == 0 or temp[0] == '': # wenn es keine Einträge gibt oder die Startzeit fehlt --> continue continue start_times.append(float(temp[0])) # Die Startzeiten an die Startliste anhängen if temp[1] == '': # Wenn die Endzeit fehlt: Null eintragen print("Endzeit fehlt!") end_times.append(start_times[-1] + 1/30) # nehmen wir mal an, der Fisch waere 1 Frame lang da gewesen else: end_times.append((float(temp[1]))) # Ansonsten an die Endliste anhängen durations.append(end_times[-1] - start_times[-1]) return start_times, end_times, durations def get_visit_durations(fish_behavior, day, subject, light_cond): # print(day, subject) subject_behavior_today = fish_behavior[(fish_behavior.day == day) & (fish_behavior.subject == subject)] # bastelt mir auch Kombis die es gar nicht gibt--> die müssten dann ja leer sein visit_durations = np.zeros(8) all_light = [] # Visittimes die im Hellen stattgefunden haben all_dark = [] # Visittimes die im Dunkeln stattgefunden haben dark_counts = 0 light_counts = 0 for feeder_number in range(1, 9): # Holt den ganzen kladderadatsch aus der anderen csv Datei feeder_light_condition = "light" if (light_cond == "top" and feeder_number < 5) or (light_cond == "bottom" and feeder_number >= 5) else "dark" _, _, durations = visit_times_to_float(subject_behavior_today, feeder_number) if len(durations) == 0: # Wenn die Durationliste für diesen Feeder leer sein sollte : setze nan ein visit_durations[feeder_number - 1] = np.nan else: visit_durations[feeder_number - 1] = np.median(durations) # Wenn die Liste länger als 0 ist --> mach mir nen Median if feeder_light_condition == 'light': all_light.append(visit_durations[feeder_number-1]) light_counts += len(durations) else: all_dark.append(visit_durations[feeder_number-1]) dark_counts += len(durations) return visit_durations, np.nanmedian(all_light), np.nanmedian(all_dark), light_counts, dark_counts def plot_trial_visit_times (fish_behavior, setup_config, days, subjects): x_val = [1, 2, 3, 4, 5, 6, 7, 8] dark_light_duration_relation = [] dark_light_count_relation = [] for day in days: fig = plt.figure(figsize=(7, 4)) feeder_visists_axis = fig.add_subplot(1, 3, 1) boxplot_axis = fig.add_subplot(1,3,2) count_axis = fig.add_subplot(1,3,3) light_duration = [] dark_duration = [] light_counts = [] dark_counts = [] for i, subject in enumerate(subjects): if subject not in setup_config.subject[setup_config['day'] == day].unique(): print("subject %s was not recorded on day %s" % (subject, day)) continue print(subject, day) light_cond = setup_config.dark[(setup_config['day'] == day) & (setup_config['subject'] == subject)].values[0] feeder_visit_durations, med_light, med_dark, count_light, count_dark = get_visit_durations(fish_behavior, day, subject, light_cond) plot_visit_durations = feeder_visit_durations plot_visit_durations[np.isnan(feeder_visit_durations)] = 0.0 feeder_visists_axis.scatter(x_val, plot_visit_durations) light_duration.append(med_light) dark_duration.append(med_dark) light_counts.append(count_light) dark_counts.append(count_dark) light_duration = np.array(light_duration) dark_duration = np.array(dark_duration) title = '%s' % (day) # zwei Platzhalter für jeweils einen String feeder_visists_axis.set_xlabel('feeder_number') feeder_visists_axis.set_ylabel('median_visit_time') feeder_visists_axis.set_title(title) boxplot_axis.boxplot([light_duration[~np.isnan(light_duration)], dark_duration[~np.isnan(dark_duration)]]) boxplot_axis.set_xticklabels(["light", "dark"]) count_axis.boxplot([light_counts, dark_counts]) count_axis.set_xticklabels(["light", "dark"]) count_axis.set_ylabel("number of visits") plt.close() dark_light_duration_relation.append(np.median(dark_duration)/np.median(light_duration)) dark_light_count_relation.append(np.median(dark_counts)/np.median(light_counts)) dark_light_duration_relation = np.array(dark_light_duration_relation) dark_light_count_relation = np.array(dark_light_count_relation) days = np.arange(len(dark_light_duration_relation), dtype=int) fig = plt.figure() ax1 = fig.add_subplot(2,1,1) ax1.plot(days, dark_light_duration_relation, marker=".", lw=0.1, label="visit durations") m, n, r, p, _ = st.linregress(days[~ np.isnan(dark_light_duration_relation)], dark_light_duration_relation[~ np.isnan(dark_light_duration_relation)]) ax1.plot(days, m * days + n, label="r: %.2f, p:%.3f" % (r, p)) ax1.legend() ax1.set_ylabel("duration relation dark/light", fontsize=9) ax1.set_ylim([0., 1.2* max(dark_light_duration_relation)]) ax2 = fig.add_subplot(2,1,2) ax2.plot(days, dark_light_count_relation, marker=".", lw=0.1, label="visit counts") m, n, r, p, _ = st.linregress(days[~ np.isnan(dark_light_count_relation)], dark_light_count_relation[~ np.isnan(dark_light_count_relation)]) ax2.plot(days, m * days + n, label="r: %.2f, p:%.3f" % (r, p)) ax2.legend() ax2.set_ylabel("count relation dark/light", fontsize=9) ax2.set_xlabel("experimental days", fontsize=9) ax2.set_ylim([0., 1.2* max(dark_light_count_relation)]) plt.show() def get_feeder_risks(data, feeder_count=8): risks = np.zeros(8) for f in range(1, 9): r_name = "feeder_%i_risk" % f risks[f-1] = data[r_name].values[0] return risks def analyze_feeder_risk(behavior_data, subjects, days, limit=400): all_results = [] for subject in subjects: for day in days: boldness = {} data = behavior_data[(behavior_data.subject == subject) & (behavior_data.day == day)] if len(data) == 0: continue feeder_risks = get_feeder_risks(data) avg_risk = np.mean(feeder_risks) risks = [] times = [] for feeder in range(1, 9): st, _, _ = visit_times_to_float(data, feeder) risks.extend([feeder_risks[feeder-1]] * len(st)) times.extend(st) risks = np.array(risks) times = np.array(times) risks = risks[times < limit] times = times[times < limit] sorted_risks = risks[np.argsort(times)] sorted_times = times[np.argsort(times)] boldness["subject"] = subject boldness["day"] = day boldness["day_number"] = int(day.split("_")[-1]) boldness["avg_risk"] = avg_risk boldness["avg_risk_dark"] = np.mean(feeder_risks[feeder_risks < 1.0]) boldness["avg_risk_light"] = np.mean(feeder_risks[feeder_risks >= 1.0]) boldness["feeder_risks"] = feeder_risks boldness["sorted_risks"] = sorted_risks boldness["sorted_times"] = sorted_times boldness["total_risk"] = np.sum(sorted_risks) all_results.append(boldness) return pd.DataFrame(all_results) def plot_risk_per_day(data, subjects): for subject in subjects: fig = plt.figure(figsize=(5, 5)) ax1 = fig.add_subplot(3,1,1) ax2 = fig.add_subplot(3,1,2) ax3 = fig.add_subplot(3,1,3) subject_data = data[data.subject == subject] days = subject_data.day_number.values trs = subject_data.total_risk.values x = subject_data.sorted_risks.values num_visits = np.zeros(x.shape[0]) for i in range(x.shape[0]): num_visits[i] = len(x[i]) avg_feeder_risks = subject_data.avg_risk.values avg_feeder_risks_dark = subject_data.avg_risk_dark.values trs = trs[np.argsort(days)] num_visits = num_visits[np.argsort(days)] avg_feeder_risks = avg_feeder_risks[np.argsort(days)] avg_feeder_risks_dark = avg_feeder_risks_dark[np.argsort(days)] days = days[np.argsort(days)] m, n, r, p, _ = st.linregress(days[~np.isnan(trs)], trs[~np.isnan(trs)]) ax1.plot(days, trs, marker=".", label=subject) ax1.plot(days, m * days + n, label="r: %.2f, p:%.3f" % (r, p)) ax1.legend(fontsize=7) ax1.set_ylim([0, 1.2*max(trs)]) ax1.set_ylabel("total risk", fontsize=9) ax2.plot(days, trs/num_visits, marker=".", label=subject) m, n, r, p, _ = st.linregress(days[~np.isnan(trs/num_visits)], (trs/num_visits)[~np.isnan(trs/num_visits)]) ax2.plot(days, m * days + n, label="r: %.2f, p:%.3f" % (r, p)) ax2.set_ylim([0, 2]) ax2.set_ylabel("avg. risk per visit", fontsize=9) ax2.plot(days, avg_feeder_risks, ls="dashed", lw=0.5, color="darkgreen", label="avg feeder risk") ax2.legend(fontsize=7) ax3.plot(days, trs/num_visits/avg_feeder_risks, marker=".", label="") m, n, r, p, _ = st.linregress(days[~np.isnan(trs/num_visits/avg_feeder_risks)], (trs/num_visits/avg_feeder_risks)[~np.isnan(trs/num_visits/avg_feeder_risks)]) t, p = st.ttest_1samp((trs/num_visits/avg_feeder_risks - avg_feeder_risks_dark/avg_feeder_risks)[~np.isnan(trs/num_visits/avg_feeder_risks - avg_feeder_risks_dark/avg_feeder_risks)], 0) print(subject, "ttest: p: %.3f," % p) ax3.plot(days, m * days + n, label="r: %.2f, p:%.3f" % (r, p)) ax3.plot(days, avg_feeder_risks_dark/avg_feeder_risks, color="dodgerblue", ls="dashed", lw=0.5, label="avg. feeder risk dark") ax3.hlines(1.0, min(days), max(days), ls="dashed", color="darkgreen", lw=0.5, zorder=0, label="avg. feeder risk") ax3.set_ylim([0.0, 1.5]) ax3.set_xlim([min(days), max(days)]) ax3.legend(fontsize=7) ax3.set_xlabel("experimental days", fontsize=9) ax3.set_ylabel(r"$\frac{avg.\;risk\; per\;visit}{avg.\; feeder\; risk}$", fontsize=9) ax3.text(0.05, 0.05, "one sampled t-test, p: %.3f, t: %.3f" % (p, t), ha="left", fontsize=9, transform=ax3.transAxes) fig.subplots_adjust(left=0.15, bottom=0.09, top=0.975, right=0.975, hspace=0.2) fig.savefig("risk_per_day_%s.pdf" % subject) plt.close() def compare_risks_per_subject(data, subjects): total_risks = [] total_risks_rel = [] for subject in subjects: subject_data = data[data.subject == subject] days = subject_data.day_number.values trs = subject_data.total_risk.values x = subject_data.sorted_risks.values num_visits = np.zeros(x.shape[0]) for i in range(x.shape[0]): num_visits[i] = len(x[i]) trs = trs[np.argsort(days)] num_visits = num_visits[np.argsort(days)] days = days[np.argsort(days)] total_risks.append(trs[~np.isnan(trs)]) total_risks_rel.append((trs[num_visits > 0]/num_visits[num_visits > 0])) fig = plt.figure(figsize=(5, 5)) ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax1.boxplot(total_risks, showfliers=False) ax1.set_xticklabels("") ax1.set_ylabel("total risk", fontsize=9) ax2.boxplot(total_risks_rel, showfliers=False) ax2.set_xticklabels(subjects, fontsize=9, rotation=45) ax2.set_ylabel("risk per visit", fontsize=9) ax2.set_ylim([0, 2.0]) fig.subplots_adjust(left=0.1, right=0.975, top=0.975, bottom=0.15, hspace=0.2) fig.savefig("total_risk_comparison.pdf") plt.close() def plot_risk_over_time(data, subjects): all_slopes = [] for subject in subjects: slopes = [] subject_data = data[data.subject == subject] fig = plt.figure(figsize=(5, 5)) ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) days = subject_data.day.values day_numbers = subject_data.day_number.values risks = subject_data.sorted_risks.values times = subject_data.sorted_times.values sort_indices = np.argsort(day_numbers) days = days[np.argsort(days)] for index in sort_indices: day = days[index] day_times = times[index] day_risks = risks[index] print(subject, len(day_times), len(day_risks)) if len(day_risks) > 1: m, n, _, _, _ = st.linregress(day_times, np.cumsum(day_risks)) slopes.append(m) else: slopes.append(np.nan) l = ax1.plot(day_times, np.cumsum(day_risks), marker=".", lw=0.1, label=day) ax1.plot(day_times, m * day_times + n, lw=1, color=l[0].get_color()) slopes = np.array(slopes) all_slopes.append(slopes[~np.isnan(slopes)]) ax1.set_xlabel("time [s]") ax1.set_ylabel("risk taken") ax1.text(0.0, 1.05, subject, transform=ax1.transAxes, fontsize=9, fontweight="bold") ax2.scatter(day_numbers, slopes, marker="o", label="slopes") m, n, r, p, _ = st.linregress(day_numbers[~np.isnan(slopes)], slopes[~np.isnan(slopes)]) ax2.plot(day_numbers, m * day_numbers + n, label="r: %.2f, p: %.3f" % (r, p)) ax2.set_xlabel("experimental day", fontsize=9) ax2.set_ylabel("slope [risk/s]", fontsize=9) ax2.set_ylim([0, 0.15]) ax2.legend(fontsize=7) fig.subplots_adjust(left =0.15, top=0.95, right=0.975, bottom=0.09, hspace=0.25) fig.savefig("risk_over_time_%s.pdf" % subject) plt.close() fig = plt.figure(figsize=(3.5, 3.5)) ax = fig.add_subplot(111) ax.boxplot(all_slopes, showfliers=False) ax.set_ylabel("slope [risk/s]") ax.set_xticklabels(subjects, fontsize=9, rotation=45) fig.subplots_adjust(left=0.2, right=0.975, top=0.975, bottom=0.2) fig.savefig("eagerness.pdf") plt.close() if __name__ == '__main__': #df = pd.read_csv(r'C:\Users\marie\Documents\UNI\7.Semester\Betschler\Skriptschnickschnack\csv_dateien\feeder_visits.csv', index_col=0, sep=';') #df2 = pd.read_csv(r'C:\Users\marie\Documents\UNI\7.Semester\Betschler\Skriptschnickschnack\csv_dateien\setup_configurations.csv', index_col=0, sep=';') df = pd.read_csv("../data/feeder_visits.csv", index_col=0, sep=';') df2 = pd.read_csv('../data/setup_configuration.csv', index_col=0, sep=';') days = df.day.unique() subjects = df.subject.unique() plot_trial_visit_times(df, df2, days, subjects) boldness = analyze_feeder_risk(df, subjects, days) plot_risk_per_day(boldness, subjects) compare_risks_per_subject(boldness, subjects) plot_risk_over_time(boldness, subjects)