something works !!!

This commit is contained in:
Till Raab 2023-10-24 08:56:35 +02:00
parent 30a9e71e76
commit 85e675fb48
4 changed files with 152 additions and 20 deletions

View File

@ -1,8 +1,9 @@
import torch import torch
import pathlib
BATCH_SIZE = 4 BATCH_SIZE = 4
RESIZE_TO = 416 RESIZE_TO = 416
NUM_EPOCHS = 10 NUM_EPOCHS = 20
NUM_WORKERS = 4 NUM_WORKERS = 4
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
@ -14,3 +15,7 @@ CLASSES = ['__backgroud__', '1']
NUM_CLASSES = len(CLASSES) NUM_CLASSES = len(CLASSES)
OUTDIR = 'model_outputs' OUTDIR = 'model_outputs'
if not pathlib.Path(OUTDIR).exists():
pathlib.Path(OUTDIR).mkdir(parents=True, exist_ok=True)

View File

@ -1,6 +1,84 @@
import torch
import matplotlib.pyplot as plt
from confic import OUTDIR
class Averager:
def __init__(self):
self.current_total = 0.0
self.iterations = 0.0
def send(self, value):
self.current_total += value
self.iterations += 1
@property
def value(self):
if self.iterations == 0:
return 0
else:
return 1.0 * self.current_total / self.iterations
def reset(self):
self.current_total = 0.0
self.iterations = 0.0
class SaveBestModel:
"""
Class to save the best model while training. If the current epoch's
validation loss is less than the previous least less, then save the
model state.
"""
def __init__(
self, best_valid_loss=float('inf')
):
self.best_valid_loss = best_valid_loss
def __call__(
self, current_valid_loss,
epoch, model, optimizer
):
if current_valid_loss < self.best_valid_loss:
self.best_valid_loss = current_valid_loss
print(f"\nBest validation loss: {self.best_valid_loss}")
print(f"\nSaving best model for epoch: {epoch + 1}\n")
torch.save({
'epoch': epoch + 1,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}, f'./{OUTDIR}/best_model.pth')
def collate_fn(batch): def collate_fn(batch):
""" """
To handle the data loading as different images may have different number To handle the data loading as different images may have different number
of objects and to handle varying size tensors as well. of objects and to handle varying size tensors as well.
""" """
return tuple(zip(*batch)) return tuple(zip(*batch))
def save_model(epoch, model, optimizer):
"""
Function to save the trained model till current epoch, or whenver called
"""
torch.save({
'epoch': epoch+1,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}, f'./{OUTDIR}/last_model.pth')
def save_loss_plot(OUT_DIR, train_loss, val_loss):
figure_1, train_ax = plt.subplots()
figure_2, valid_ax = plt.subplots()
train_ax.plot(train_loss, color='tab:blue')
train_ax.set_xlabel('iterations')
train_ax.set_ylabel('train loss')
valid_ax.plot(val_loss, color='tab:red')
valid_ax.set_xlabel('iterations')
valid_ax.set_ylabel('validation loss')
figure_1.savefig(f"{OUT_DIR}/train_loss.png")
figure_2.savefig(f"{OUT_DIR}/valid_loss.png")
print('SAVING PLOTS COMPLETE...')
plt.close('all')

View File

@ -23,6 +23,6 @@ def create_model(num_classes: int) -> torch.nn.Module:
in_features = model.roi_heads.box_predictor.cls_score.in_features in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes+1) model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model return model

View File

@ -2,7 +2,9 @@ from confic import (DEVICE, NUM_CLASSES, NUM_EPOCHS, OUTDIR, NUM_WORKERS, TRAIN_
from model import create_model from model import create_model
from tqdm.auto import tqdm from tqdm.auto import tqdm
from datasets import create_train_test_dataset, create_train_loader, create_valid_loader from datasets import create_train_test_dataset, create_train_loader, create_valid_loader
from custom_utils import Averager, SaveBestModel, save_model, save_loss_plot
import torch import torch
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@ -10,34 +12,81 @@ import time
from IPython import embed from IPython import embed
if __name__ == '__main__': def train(train_loader, model, optimizer):
train_data, test_data = create_train_test_dataset(TRAIN_DIR) print('Training')
train_loader = create_train_loader(train_data) global train_loss_list
test_loader = create_train_loader(test_data)
model = create_model(num_classes=1)
model = model.to(DEVICE)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
for epoch in range(NUM_EPOCHS):
prog_bar = tqdm(train_loader, total=len(train_loader)) prog_bar = tqdm(train_loader, total=len(train_loader))
for samples, targets in prog_bar: for samples, targets in prog_bar:
images = list(image.to(DEVICE) for image in samples) images = list(image.to(DEVICE) for image in samples)
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets] targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
try:
loss_dict = model(images, targets) loss_dict = model(images, targets)
except:
embed()
quit()
losses = sum(loss for loss in loss_dict.values()) losses = sum(loss for loss in loss_dict.values())
loss_value = losses.item() loss_value = losses.item()
train_loss_hist.send(loss_value) # this is a global instance !!!
train_loss_list.append(loss_value) # check what exactly this does !!!
optimizer.zero_grad() optimizer.zero_grad()
losses.backward() losses.backward()
optimizer.step() optimizer.step()
prog_bar.set_description(desc=f"Loss: {loss_value:.4f}") prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
return train_loss_list
if __name__ == '__main__':
train_data, test_data = create_train_test_dataset(TRAIN_DIR)
train_loader = create_train_loader(train_data)
test_loader = create_train_loader(test_data)
model = create_model(num_classes=NUM_CLASSES)
model = model.to(DEVICE)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
train_loss_hist = Averager()
val_loss_hist = Averager()
# train_itr = 1
# val_itr = 1
train_loss_list = []
val_loss_list = []
save_best_model = SaveBestModel()
for epoch in range(NUM_EPOCHS):
train_loss_hist.reset()
val_loss_hist.reset()
train_loss = train(train_loader, model, optimizer)
# val_loss = validate(train_loader, model, optimizer)
save_best_model(
val_loss_hist.value, epoch, model, optimizer
)
save_model(epoch, model, optimizer)
save_loss_plot(OUTDIR, train_loss, val_loss)
# prog_bar = tqdm(train_loader, total=len(train_loader))
# for samples, targets in prog_bar:
# images = list(image.to(DEVICE) for image in samples)
#
# targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
#
# loss_dict = model(images, targets)
#
# losses = sum(loss for loss in loss_dict.values())
# loss_value = losses.item()
#
# optimizer.zero_grad()
# losses.backward()
# optimizer.step()
#
# prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")