labels in generate dataset should be int now

2023-11-08 14:04:27 +01:00 · 2023-11-08 14:04:27 +01:00 · 5f270c41f2
commit 5f270c41f2
parent 92ab342a65
2 changed files with 53 additions and 112 deletions
--- a/data/train_test_split.py
+++ b/data/train_test_split.py
@ -1,44 +0,0 @@
 import pandas as pd
 from pathlib import Path
 import numpy as np
 import os
 from IPython import embed
 def define_train_test_img_names(bbox, test_size = 0.2):
    np.random.seed(42)
    unique_imgs = np.asarray(pd.unique(bbox['image']))
    np.random.shuffle(unique_imgs)
    test_img = sorted(unique_imgs[:int(len(unique_imgs) * test_size)])
    train_img = sorted(unique_imgs[int(len(unique_imgs) * test_size):])
    return test_img, train_img
 def split_data_df_in_test_train_df(bbox, test_img, train_img):
    cols = list(bbox.keys())
    test_bbox = pd.DataFrame(columns=cols)
    train_bbox = pd.DataFrame(columns=cols)
    for img_name in test_img:
        tmp_df = bbox[bbox['image'] == img_name]
        test_bbox = pd.concat([test_bbox, tmp_df], ignore_index=True)
    for img_name in train_img:
        tmp_df = bbox[bbox['image'] == img_name]
        train_bbox = pd.concat([train_bbox, tmp_df], ignore_index=True)
    return train_bbox, test_bbox, cols
 def main(path):
    bbox = pd.read_csv(path/'bbox_dataset.csv', sep=',', index_col=0)
    test_img, train_img = define_train_test_img_names(bbox)
    train_bbox, test_bbox, cols = split_data_df_in_test_train_df(bbox, test_img, train_img)
    train_bbox.to_csv(path/'bbox_train.csv', columns=cols, sep=',')
    test_bbox.to_csv(path/'bbox_test.csv', columns=cols, sep=',')
 if __name__ == '__main__':
    main(Path('./dataset'))
--- a/generate_dataset.py
+++ b/generate_dataset.py
@ -78,7 +78,7 @@ def load_trial_data(folder):
    return fish_freq, rise_idx, rise_size, fish_baseline_freq, fish_baseline_freq_time
-def save_spec_pic(folder, s_trans, times, freq, t_idx0, t_idx1, f_idx0, f_idx1, dataset_folder):
+def save_spec_pic(folder, s_trans, times, freq, t_idx0, t_idx1, f_idx0, f_idx1, pic_save_folder):
    f_res, t_res = freq[1] - freq[0], times[1] - times[0]
    fig_title = (f'{Path(folder).name}__{times[t_idx0]:5.0f}s-{times[t_idx1]:5.0f}s__{freq[f_idx0]:4.0f}-{freq[f_idx1]:4.0f}Hz.png').replace(' ', '0')
@ -90,14 +90,14 @@ def save_spec_pic(folder, s_trans, times, freq, t_idx0, t_idx1, f_idx0, f_idx1,
    ax.axis(False)
    # plt.savefig(os.path.join(dataset_folder, fig_title), dpi=IMG_DPI)
-    plt.savefig(Path(DATA_DIR)/fig_title, dpi=IMG_DPI)
+
    plt.savefig(Path(pic_save_folder)/fig_title, dpi=IMG_DPI)
    plt.close()
-    return fig_title, (IMG_SIZE[0]*IMG_DPI, IMG_SIZE[1]*IMG_DPI)
+    return fig_title
-def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq_time, fish_baseline_freq, pic_save_str,
+def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq_time, fish_baseline_freq, pic_save_str,t0, t1, f0, f1):
                     bbox_df, cols, width, height, t0, t1, f0, f1):
    times_v_idx0, times_v_idx1 = np.argmin(np.abs(times_v - t0)), np.argmin(np.abs(times_v - t1))
@ -161,13 +161,6 @@ def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq
        if len(left_time_bound) == 0:
            continue
        # x0 = np.array((left_time_bound - t0) / (t1 - t0) * width, dtype=int)
        # x1 = np.array((right_time_bound - t0) / (t1 - t0) * width, dtype=int)
        #
        # y0 = np.array((1 - (upper_freq_bound - f0) / (f1 - f0)) * height, dtype=int)
        # y1 = np.array((1 - (lower_freq_bound - f0) / (f1 - f0)) * height, dtype=int)
        rel_x0 = np.array((left_time_bound - t0) / (t1 - t0), dtype=float)
        rel_x1 = np.array((right_time_bound - t0) / (t1 - t0), dtype=float)
@ -184,15 +177,8 @@ def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq
        all_width.extend(rel_width)
        all_height.extend(rel_height)
        # bbox = np.array([[pic_save_str for i in range(len(left_time_bound))],
        #                  left_time_bound,
        #                  right_time_bound,
        #                  lower_freq_bound,
        #                  upper_freq_bound,
        #                  x0, y0, x1, y1])
    bbox_yolo_style = np.array([
-        np.ones(len(all_x_center)),
+        np.ones(len(all_x_center), dtype=int),
        all_x_center,
        all_y_center,
        all_width,
@ -205,16 +191,19 @@ def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq
 def main(args):
    folders = list(f.parent for f in Path(args.folder).rglob('fill_times.npy'))
    pic_save_folder = DATA_DIR if not args.inference else (Path('data') / Path(args.folder).name)
    if len(folders) == 0:
        print('no datasets containing fill_times.npy found')
    if not args.inference:
        print('generate training dataset only for files with detected rises')
        folders = [folder for folder in folders if (folder / 'analysis' / 'rise_idx.npy').exists()]
        cols = ['image', 't0', 't1', 'f0', 'f1', 'x0', 'y0', 'x1', 'y1']
        bbox_df = pd.DataFrame(columns=cols)
    else:
        print('generate inference dataset ... only image output')
-        bbox_df = {}
+        if not (Path('data') / Path(args.folder).name).exists():
            (Path('data') / Path(args.folder).name).mkdir(parents=True, exist_ok=True)
    for enu, folder in enumerate(folders):
        print(f'DataSet generation from {folder} | {enu+1}/{len(folders)}')
@ -259,52 +248,52 @@ def main(args):
            transformed = T.Normalize(mean=torch.mean(log_s), std=torch.std(log_s))
            s_trans = transformed(log_s.unsqueeze(0))
-            pic_save_str, (width, height) = save_spec_pic(folder, s_trans, times, freq, t_idx0, t_idx1, f_idx0, f_idx1, args.dataset_folder)
+            pic_save_str = save_spec_pic(folder, s_trans, times, freq, t_idx0, t_idx1, f_idx0, f_idx1, pic_save_folder)
            if not args.inference:
                bbox_yolo_style = bboxes_from_file(times_v, fish_freq, rise_idx, rise_size,
                                                   fish_baseline_freq_time, fish_baseline_freq,
-                                                   pic_save_str, bbox_df, cols, width, height, t0, t1, f0, f1)
+                                                   pic_save_str,t0, t1, f0, f1)
            #######################################################################
-            if False:
+            # if False:
-                if bbox_yolo_style.shape[0] >= 1:
+            #     if bbox_yolo_style.shape[0] >= 1:
-                    f_res, t_res = freq[1] - freq[0], times[1] - times[0]
+            #         f_res, t_res = freq[1] - freq[0], times[1] - times[0]
-
+            #
-                    fig_title = (
+            #         fig_title = (
-                        f'{Path(folder).name}__{times[t_idx0]:5.0f}s-{times[t_idx1]:5.0f}s__{freq[f_idx0]:4.0f}-{freq[f_idx1]:4.0f}Hz.png').replace(
+            #             f'{Path(folder).name}__{times[t_idx0]:5.0f}s-{times[t_idx1]:5.0f}s__{freq[f_idx0]:4.0f}-{freq[f_idx1]:4.0f}Hz.png').replace(
-                        ' ', '0')
+            #             ' ', '0')
-                    fig = plt.figure(figsize=IMG_SIZE, num=fig_title)
+            #         fig = plt.figure(figsize=IMG_SIZE, num=fig_title)
-                    gs = gridspec.GridSpec(1, 1, bottom=0.1, left=0.1, right=0.95, top=0.95)  #
+            #         gs = gridspec.GridSpec(1, 1, bottom=0.1, left=0.1, right=0.95, top=0.95)  #
-                    ax = fig.add_subplot(gs[0, 0])
+            #         ax = fig.add_subplot(gs[0, 0])
-                    ax.imshow(s_trans.squeeze(), cmap='gray', aspect='auto', origin='lower',
+            #         ax.imshow(s_trans.squeeze(), cmap='gray', aspect='auto', origin='lower',
-                              extent=(times[t_idx0] / 3600, (times[t_idx1] + t_res) / 3600, freq[f_idx0], freq[f_idx1] + f_res))
+            #                   extent=(times[t_idx0] / 3600, (times[t_idx1] + t_res) / 3600, freq[f_idx0], freq[f_idx1] + f_res))
-                    # ax.invert_yaxis()
+            #         # ax.invert_yaxis()
-                    # ax.axis(False)
+            #         # ax.axis(False)
-
+            #
-                    for i in range(len(bbox_df)):
+            #         for i in range(len(bbox_df)):
-                        # Cbbox = np.array(bbox_df.loc[i, ['x0', 'y0', 'x1', 'y1']].values, dtype=np.float32)
+            #             # Cbbox = np.array(bbox_df.loc[i, ['x0', 'y0', 'x1', 'y1']].values, dtype=np.float32)
-                        Cbbox = bbox_df.loc[i, ['t0', 'f0', 't1', 'f1']]
+            #             Cbbox = bbox_df.loc[i, ['t0', 'f0', 't1', 'f1']]
-                        ax.add_patch(
+            #             ax.add_patch(
-                            Rectangle((float(Cbbox['t0']) / 3600, float(Cbbox['f0'])),
+            #                 Rectangle((float(Cbbox['t0']) / 3600, float(Cbbox['f0'])),
-                                      float(Cbbox['t1']) / 3600 - float(Cbbox['t0']) / 3600,
+            #                           float(Cbbox['t1']) / 3600 - float(Cbbox['t0']) / 3600,
-                                      float(Cbbox['f1']) - float(Cbbox['f0']),
+            #                           float(Cbbox['f1']) - float(Cbbox['f0']),
-                                      fill=False, color="white", linestyle='-', linewidth=2, zorder=10)
+            #                           fill=False, color="white", linestyle='-', linewidth=2, zorder=10)
-                        )
+            #             )
-
+            #
-                    # print(bbox_yolo_style.T)
+            #         # print(bbox_yolo_style.T)
-
+            #
-                    for bbox in bbox_yolo_style:
+            #         for bbox in bbox_yolo_style:
-                        x0 = bbox[1] - bbox[3]/2 # x_center - width/2
+            #             x0 = bbox[1] - bbox[3]/2 # x_center - width/2
-                        y0 = 1 - (bbox[2] + bbox[4]/2) # x_center - width/2
+            #             y0 = 1 - (bbox[2] + bbox[4]/2) # x_center - width/2
-                        w = bbox[3]
+            #             w = bbox[3]
-                        h = bbox[4]
+            #             h = bbox[4]
-                        ax.add_patch(
+            #             ax.add_patch(
-                            Rectangle((x0, y0), w, h,
+            #                 Rectangle((x0, y0), w, h,
-                                      fill=False, color="k", linestyle='--', linewidth=2, zorder=10,
+            #                           fill=False, color="k", linestyle='--', linewidth=2, zorder=10,
-                                      transform=ax.transAxes)
+            #                           transform=ax.transAxes)
-                        )
+            #             )
-                    plt.show()
+            #         plt.show()
            #######################################################################
        # if not args.inference:
@ -314,11 +303,7 @@ def main(args):
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Evaluated electrode array recordings with multiple fish.')
    parser.add_argument('folder', type=str, help='single recording analysis', default='')
-    parser.add_argument('-d', "--dataset_folder", type=str, help='designated datasef folder', default=DATA_DIR)
+    parser.add_argument('-i', "--inference", action="store_true")
    parser.add_argument('-i', "--inference", action="store_true", help="generate inference dataset. Img only")
    args = parser.parse_args()
    if not Path(args.dataset_folder).exists():
        Path(args.dataset_folder).mkdir(parents=True, exist_ok=True)
    main(args)