something works !!!
This commit is contained in:
parent
30a9e71e76
commit
85e675fb48
@ -1,8 +1,9 @@
|
|||||||
import torch
|
import torch
|
||||||
|
import pathlib
|
||||||
|
|
||||||
BATCH_SIZE = 4
|
BATCH_SIZE = 4
|
||||||
RESIZE_TO = 416
|
RESIZE_TO = 416
|
||||||
NUM_EPOCHS = 10
|
NUM_EPOCHS = 20
|
||||||
NUM_WORKERS = 4
|
NUM_WORKERS = 4
|
||||||
|
|
||||||
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||||
@ -14,3 +15,7 @@ CLASSES = ['__backgroud__', '1']
|
|||||||
NUM_CLASSES = len(CLASSES)
|
NUM_CLASSES = len(CLASSES)
|
||||||
|
|
||||||
OUTDIR = 'model_outputs'
|
OUTDIR = 'model_outputs'
|
||||||
|
|
||||||
|
|
||||||
|
if not pathlib.Path(OUTDIR).exists():
|
||||||
|
pathlib.Path(OUTDIR).mkdir(parents=True, exist_ok=True)
|
@ -1,6 +1,84 @@
|
|||||||
|
import torch
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from confic import OUTDIR
|
||||||
|
class Averager:
|
||||||
|
def __init__(self):
|
||||||
|
self.current_total = 0.0
|
||||||
|
self.iterations = 0.0
|
||||||
|
|
||||||
|
def send(self, value):
|
||||||
|
self.current_total += value
|
||||||
|
self.iterations += 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self):
|
||||||
|
if self.iterations == 0:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return 1.0 * self.current_total / self.iterations
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.current_total = 0.0
|
||||||
|
self.iterations = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class SaveBestModel:
|
||||||
|
"""
|
||||||
|
Class to save the best model while training. If the current epoch's
|
||||||
|
validation loss is less than the previous least less, then save the
|
||||||
|
model state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, best_valid_loss=float('inf')
|
||||||
|
):
|
||||||
|
self.best_valid_loss = best_valid_loss
|
||||||
|
|
||||||
|
def __call__(
|
||||||
|
self, current_valid_loss,
|
||||||
|
epoch, model, optimizer
|
||||||
|
):
|
||||||
|
if current_valid_loss < self.best_valid_loss:
|
||||||
|
self.best_valid_loss = current_valid_loss
|
||||||
|
print(f"\nBest validation loss: {self.best_valid_loss}")
|
||||||
|
print(f"\nSaving best model for epoch: {epoch + 1}\n")
|
||||||
|
torch.save({
|
||||||
|
'epoch': epoch + 1,
|
||||||
|
'model_state_dict': model.state_dict(),
|
||||||
|
'optimizer_state_dict': optimizer.state_dict(),
|
||||||
|
}, f'./{OUTDIR}/best_model.pth')
|
||||||
|
|
||||||
|
|
||||||
def collate_fn(batch):
|
def collate_fn(batch):
|
||||||
"""
|
"""
|
||||||
To handle the data loading as different images may have different number
|
To handle the data loading as different images may have different number
|
||||||
of objects and to handle varying size tensors as well.
|
of objects and to handle varying size tensors as well.
|
||||||
"""
|
"""
|
||||||
return tuple(zip(*batch))
|
return tuple(zip(*batch))
|
||||||
|
|
||||||
|
|
||||||
|
def save_model(epoch, model, optimizer):
|
||||||
|
"""
|
||||||
|
Function to save the trained model till current epoch, or whenver called
|
||||||
|
"""
|
||||||
|
torch.save({
|
||||||
|
'epoch': epoch+1,
|
||||||
|
'model_state_dict': model.state_dict(),
|
||||||
|
'optimizer_state_dict': optimizer.state_dict(),
|
||||||
|
}, f'./{OUTDIR}/last_model.pth')
|
||||||
|
|
||||||
|
def save_loss_plot(OUT_DIR, train_loss, val_loss):
|
||||||
|
figure_1, train_ax = plt.subplots()
|
||||||
|
figure_2, valid_ax = plt.subplots()
|
||||||
|
train_ax.plot(train_loss, color='tab:blue')
|
||||||
|
train_ax.set_xlabel('iterations')
|
||||||
|
train_ax.set_ylabel('train loss')
|
||||||
|
valid_ax.plot(val_loss, color='tab:red')
|
||||||
|
valid_ax.set_xlabel('iterations')
|
||||||
|
valid_ax.set_ylabel('validation loss')
|
||||||
|
figure_1.savefig(f"{OUT_DIR}/train_loss.png")
|
||||||
|
figure_2.savefig(f"{OUT_DIR}/valid_loss.png")
|
||||||
|
print('SAVING PLOTS COMPLETE...')
|
||||||
|
|
||||||
|
plt.close('all')
|
||||||
|
|
||||||
|
2
model.py
2
model.py
@ -23,6 +23,6 @@ def create_model(num_classes: int) -> torch.nn.Module:
|
|||||||
|
|
||||||
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||||
|
|
||||||
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes+1)
|
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||||
|
|
||||||
return model
|
return model
|
79
train.py
79
train.py
@ -2,7 +2,9 @@ from confic import (DEVICE, NUM_CLASSES, NUM_EPOCHS, OUTDIR, NUM_WORKERS, TRAIN_
|
|||||||
from model import create_model
|
from model import create_model
|
||||||
|
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
from datasets import create_train_test_dataset, create_train_loader, create_valid_loader
|
from datasets import create_train_test_dataset, create_train_loader, create_valid_loader
|
||||||
|
from custom_utils import Averager, SaveBestModel, save_model, save_loss_plot
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@ -10,34 +12,81 @@ import time
|
|||||||
|
|
||||||
from IPython import embed
|
from IPython import embed
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def train(train_loader, model, optimizer):
|
||||||
train_data, test_data = create_train_test_dataset(TRAIN_DIR)
|
print('Training')
|
||||||
train_loader = create_train_loader(train_data)
|
global train_loss_list
|
||||||
test_loader = create_train_loader(test_data)
|
|
||||||
|
|
||||||
model = create_model(num_classes=1)
|
|
||||||
model = model.to(DEVICE)
|
|
||||||
|
|
||||||
params = [p for p in model.parameters() if p.requires_grad]
|
|
||||||
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
|
|
||||||
|
|
||||||
for epoch in range(NUM_EPOCHS):
|
|
||||||
prog_bar = tqdm(train_loader, total=len(train_loader))
|
prog_bar = tqdm(train_loader, total=len(train_loader))
|
||||||
for samples, targets in prog_bar:
|
for samples, targets in prog_bar:
|
||||||
images = list(image.to(DEVICE) for image in samples)
|
images = list(image.to(DEVICE) for image in samples)
|
||||||
|
|
||||||
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
|
targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
|
||||||
try:
|
|
||||||
loss_dict = model(images, targets)
|
loss_dict = model(images, targets)
|
||||||
except:
|
|
||||||
embed()
|
|
||||||
quit()
|
|
||||||
|
|
||||||
losses = sum(loss for loss in loss_dict.values())
|
losses = sum(loss for loss in loss_dict.values())
|
||||||
loss_value = losses.item()
|
loss_value = losses.item()
|
||||||
|
train_loss_hist.send(loss_value) # this is a global instance !!!
|
||||||
|
train_loss_list.append(loss_value) # check what exactly this does !!!
|
||||||
|
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
losses.backward()
|
losses.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
|
prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
|
||||||
|
|
||||||
|
return train_loss_list
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
train_data, test_data = create_train_test_dataset(TRAIN_DIR)
|
||||||
|
train_loader = create_train_loader(train_data)
|
||||||
|
test_loader = create_train_loader(test_data)
|
||||||
|
|
||||||
|
model = create_model(num_classes=NUM_CLASSES)
|
||||||
|
model = model.to(DEVICE)
|
||||||
|
|
||||||
|
params = [p for p in model.parameters() if p.requires_grad]
|
||||||
|
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
|
||||||
|
|
||||||
|
train_loss_hist = Averager()
|
||||||
|
val_loss_hist = Averager()
|
||||||
|
# train_itr = 1
|
||||||
|
# val_itr = 1
|
||||||
|
train_loss_list = []
|
||||||
|
val_loss_list = []
|
||||||
|
|
||||||
|
save_best_model = SaveBestModel()
|
||||||
|
|
||||||
|
for epoch in range(NUM_EPOCHS):
|
||||||
|
|
||||||
|
train_loss_hist.reset()
|
||||||
|
val_loss_hist.reset()
|
||||||
|
|
||||||
|
train_loss = train(train_loader, model, optimizer)
|
||||||
|
# val_loss = validate(train_loader, model, optimizer)
|
||||||
|
|
||||||
|
save_best_model(
|
||||||
|
val_loss_hist.value, epoch, model, optimizer
|
||||||
|
)
|
||||||
|
|
||||||
|
save_model(epoch, model, optimizer)
|
||||||
|
|
||||||
|
save_loss_plot(OUTDIR, train_loss, val_loss)
|
||||||
|
|
||||||
|
# prog_bar = tqdm(train_loader, total=len(train_loader))
|
||||||
|
# for samples, targets in prog_bar:
|
||||||
|
# images = list(image.to(DEVICE) for image in samples)
|
||||||
|
#
|
||||||
|
# targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
|
||||||
|
#
|
||||||
|
# loss_dict = model(images, targets)
|
||||||
|
#
|
||||||
|
# losses = sum(loss for loss in loss_dict.values())
|
||||||
|
# loss_value = losses.item()
|
||||||
|
#
|
||||||
|
# optimizer.zero_grad()
|
||||||
|
# losses.backward()
|
||||||
|
# optimizer.step()
|
||||||
|
#
|
||||||
|
# prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
|
Loading…
Reference in New Issue
Block a user