working on the dataset. now i will rework the dataloader to load test and train data based on the corresponding csv files
This commit is contained in:
parent
ad74322f94
commit
ecf110e051
@ -112,23 +112,14 @@ def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq
|
|||||||
lower_freq_bound = lower_freq_bound[mask]
|
lower_freq_bound = lower_freq_bound[mask]
|
||||||
upper_freq_bound = upper_freq_bound[mask]
|
upper_freq_bound = upper_freq_bound[mask]
|
||||||
|
|
||||||
dt_bbox = right_time_bound - left_time_bound
|
# dt_bbox = right_time_bound - left_time_bound
|
||||||
df_bbox = upper_freq_bound - lower_freq_bound
|
# df_bbox = upper_freq_bound - lower_freq_bound
|
||||||
|
|
||||||
# embed()
|
|
||||||
# quit()
|
|
||||||
# left_time_bound -= dt_bbox + 0.01 * (t1 - t0)
|
|
||||||
# right_time_bound += dt_bbox + 0.01 * (t1 - t0)
|
|
||||||
# lower_freq_bound -= df_bbox + 0.01 * (f1 - f0)
|
|
||||||
# upper_freq_bound += df_bbox + 0.01 * (f1 - f0)
|
|
||||||
|
|
||||||
left_time_bound -= 0.01 * (t1 - t0)
|
left_time_bound -= 0.01 * (t1 - t0)
|
||||||
right_time_bound += 0.05 * (t1 - t0)
|
right_time_bound += 0.05 * (t1 - t0)
|
||||||
lower_freq_bound -= 0.01 * (f1 - f0)
|
lower_freq_bound -= 0.01 * (f1 - f0)
|
||||||
upper_freq_bound += 0.05 * (f1 - f0)
|
upper_freq_bound += 0.05 * (f1 - f0)
|
||||||
|
|
||||||
# embed()
|
|
||||||
# quit()
|
|
||||||
mask2 = ((left_time_bound >= t0) &
|
mask2 = ((left_time_bound >= t0) &
|
||||||
(right_time_bound <= t1) &
|
(right_time_bound <= t1) &
|
||||||
(lower_freq_bound >= f0) &
|
(lower_freq_bound >= f0) &
|
||||||
@ -150,15 +141,11 @@ def bboxes_from_file(times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq
|
|||||||
lower_freq_bound,
|
lower_freq_bound,
|
||||||
upper_freq_bound,
|
upper_freq_bound,
|
||||||
x0, y0, x1, y1])
|
x0, y0, x1, y1])
|
||||||
# test_s = ['a', 'a', 'a', 'a']
|
|
||||||
tmp_df = pd.DataFrame(
|
tmp_df = pd.DataFrame(
|
||||||
# index= [pic_save_str for i in range(len(left_time_bound))],
|
|
||||||
# index= test_s,
|
|
||||||
data=bbox.T,
|
data=bbox.T,
|
||||||
columns=cols
|
columns=cols
|
||||||
)
|
)
|
||||||
bbox_df = pd.concat([bbox_df, tmp_df], ignore_index=True)
|
bbox_df = pd.concat([bbox_df, tmp_df], ignore_index=True)
|
||||||
# bbox_df.append(tmp_df)
|
|
||||||
return bbox_df
|
return bbox_df
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
@ -200,6 +187,7 @@ def main(args):
|
|||||||
# )
|
# )
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
# Hyperparameter
|
||||||
min_freq = 200
|
min_freq = 200
|
||||||
max_freq = 1500
|
max_freq = 1500
|
||||||
d_freq = 200
|
d_freq = 200
|
||||||
@ -207,28 +195,34 @@ def main(args):
|
|||||||
d_time = 60*10
|
d_time = 60*10
|
||||||
time_overlap = 60*1
|
time_overlap = 60*1
|
||||||
|
|
||||||
|
# init dataframe if not existent so far
|
||||||
|
eval_files = []
|
||||||
if not os.path.exists(os.path.join('train', 'bbox_dataset.csv')):
|
if not os.path.exists(os.path.join('train', 'bbox_dataset.csv')):
|
||||||
cols = ['image', 't0', 't1', 'f0', 'f1', 'x0', 'y0', 'x1', 'y1']
|
cols = ['image', 't0', 't1', 'f0', 'f1', 'x0', 'y0', 'x1', 'y1']
|
||||||
bbox_df = pd.DataFrame(columns=cols)
|
bbox_df = pd.DataFrame(columns=cols)
|
||||||
|
|
||||||
|
# else load datafile ... and check for already regarded files (eval_files)
|
||||||
else:
|
else:
|
||||||
bbox_df = pd.read_csv(os.path.join('train', 'bbox_dataset.csv'), sep=',', index_col=0)
|
bbox_df = pd.read_csv(os.path.join('train', 'bbox_dataset.csv'), sep=',', index_col=0)
|
||||||
cols = list(bbox_df.keys())
|
cols = list(bbox_df.keys())
|
||||||
eval_files = []
|
|
||||||
# ToDo: make sure not same file twice
|
# ToDo: make sure not same file twice
|
||||||
for f in pd.unique(bbox_df['image']):
|
for f in pd.unique(bbox_df['image']):
|
||||||
eval_files.append(f.split('__')[0])
|
eval_files.append(f.split('__')[0])
|
||||||
|
|
||||||
|
# find folders that have fine_specs...
|
||||||
folders = list(f.parent for f in Path(args.folder).rglob('fill_times.npy'))
|
folders = list(f.parent for f in Path(args.folder).rglob('fill_times.npy'))
|
||||||
|
|
||||||
# embed()
|
|
||||||
# quit()
|
|
||||||
|
|
||||||
for enu, folder in enumerate(folders):
|
for enu, folder in enumerate(folders):
|
||||||
print(f'DataSet generation from {folder} | {enu+1}/{len(folders)}')
|
print(f'DataSet generation from {folder} | {enu+1}/{len(folders)}')
|
||||||
|
# check for those folders where rises are detected
|
||||||
if not (folder/'analysis'/'rise_idx.npy').exists():
|
if not (folder/'analysis'/'rise_idx.npy').exists():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# embed()
|
||||||
|
# quit()
|
||||||
|
# ToDo: check if folder in eval_files ... is so: continue
|
||||||
|
|
||||||
freq, times, spec, EODf_v, ident_v, idx_v, times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq, fish_baseline_freq_time = (
|
freq, times, spec, EODf_v, ident_v, idx_v, times_v, fish_freq, rise_idx, rise_size, fish_baseline_freq, fish_baseline_freq_time = (
|
||||||
load_data(folder))
|
load_data(folder))
|
||||||
f_res, t_res = freq[1] - freq[0], times[1] - times[0]
|
f_res, t_res = freq[1] - freq[0], times[1] - times[0]
|
||||||
|
44
data/train_test_split.py
Normal file
44
data/train_test_split.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
|
from IPython import embed
|
||||||
|
|
||||||
|
def define_train_test_img_names(bbox, test_size = 0.2):
|
||||||
|
np.random.seed(42)
|
||||||
|
unique_imgs = np.asarray(pd.unique(bbox['image']))
|
||||||
|
np.random.shuffle(unique_imgs)
|
||||||
|
|
||||||
|
test_img = sorted(unique_imgs[:int(len(unique_imgs) * test_size)])
|
||||||
|
train_img = sorted(unique_imgs[int(len(unique_imgs) * test_size):])
|
||||||
|
|
||||||
|
return test_img, train_img
|
||||||
|
|
||||||
|
def split_data_df_in_test_train_df(bbox, test_img, train_img):
|
||||||
|
cols = list(bbox.keys())
|
||||||
|
|
||||||
|
test_bbox = pd.DataFrame(columns=cols)
|
||||||
|
train_bbox = pd.DataFrame(columns=cols)
|
||||||
|
|
||||||
|
for img_name in test_img:
|
||||||
|
tmp_df = bbox[bbox['image'] == img_name]
|
||||||
|
test_bbox = pd.concat([test_bbox, tmp_df], ignore_index=True)
|
||||||
|
|
||||||
|
for img_name in train_img:
|
||||||
|
tmp_df = bbox[bbox['image'] == img_name]
|
||||||
|
train_bbox = pd.concat([train_bbox, tmp_df], ignore_index=True)
|
||||||
|
|
||||||
|
return train_bbox, test_bbox, cols
|
||||||
|
def main(path):
|
||||||
|
bbox = pd.read_csv(path/'bbox_dataset.csv', sep=',', index_col=0)
|
||||||
|
|
||||||
|
test_img, train_img = define_train_test_img_names(bbox)
|
||||||
|
|
||||||
|
train_bbox, test_bbox, cols = split_data_df_in_test_train_df(bbox, test_img, train_img)
|
||||||
|
|
||||||
|
train_bbox.to_csv(path/'bbox_train.csv', columns=cols, sep=',')
|
||||||
|
test_bbox.to_csv(path/'bbox_test.csv', columns=cols, sep=',')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(Path('./train'))
|
Loading…
Reference in New Issue
Block a user