1
0
Fork 0
mirror of synced 2024-06-02 02:54:31 +12:00

Refactor training loop

more shuffle
This commit is contained in:
nagadomi 2015-11-30 17:18:52 +09:00
parent c72ec3112b
commit aaac6ed6e5
4 changed files with 89 additions and 63 deletions

View file

@ -54,8 +54,8 @@ end
function data_augmentation.unsharp_mask(src, p) function data_augmentation.unsharp_mask(src, p)
if torch.uniform() < p then if torch.uniform() < p then
local radius = 0 -- auto local radius = 0 -- auto
local sigma = torch.uniform(0.7, 1.4) local sigma = torch.uniform(0.7, 3.0)
local amount = torch.uniform(0.5, 1.0) local amount = torch.uniform(0.25, 0.75)
local threshold = torch.uniform(0.0, 0.05) local threshold = torch.uniform(0.0, 0.05)
local unsharp = gm.Image(src, "RGB", "DHW"): local unsharp = gm.Image(src, "RGB", "DHW"):
unsharpMask(radius, sigma, amount, threshold): unsharpMask(radius, sigma, amount, threshold):

View file

@ -3,30 +3,32 @@ require 'cutorch'
require 'xlua' require 'xlua'
local function minibatch_adam(model, criterion, local function minibatch_adam(model, criterion,
train_x, train_x, train_y,
config, transformer, config)
input_size, target_size)
local parameters, gradParameters = model:getParameters() local parameters, gradParameters = model:getParameters()
config = config or {} config = config or {}
local sum_loss = 0 local sum_loss = 0
local count_loss = 0 local count_loss = 0
local batch_size = config.xBatchSize or 32 local batch_size = config.xBatchSize or 32
local shuffle = torch.randperm(#train_x) local shuffle = torch.randperm(train_x:size(1))
local c = 1 local c = 1
local inputs = torch.Tensor(batch_size,
input_size[1], input_size[2], input_size[3]):cuda()
local targets = torch.Tensor(batch_size,
target_size[1] * target_size[2] * target_size[3]):cuda()
local inputs_tmp = torch.Tensor(batch_size, local inputs_tmp = torch.Tensor(batch_size,
input_size[1], input_size[2], input_size[3]) train_x:size(2), train_x:size(3), train_x:size(4)):zero()
local targets_tmp = torch.Tensor(batch_size, local targets_tmp = torch.Tensor(batch_size,
target_size[1] * target_size[2] * target_size[3]) train_y:size(2)):zero()
for t = 1, #train_x do local inputs = inputs_tmp:clone():cuda()
xlua.progress(t, #train_x) local targets = targets_tmp:clone():cuda()
local xy = transformer(train_x[shuffle[t]], false, batch_size)
for i = 1, #xy do print("## update")
inputs_tmp[i]:copy(xy[i][1]) for t = 1, train_x:size(1), batch_size do
targets_tmp[i]:copy(xy[i][2]) if t + batch_size -1 > train_x:size(1) then
break
end
xlua.progress(t, train_x:size(1))
for i = 1, batch_size do
inputs_tmp[i]:copy(train_x[shuffle[t + i - 1]])
targets_tmp[i]:copy(train_y[shuffle[t + i - 1]])
end end
inputs:copy(inputs_tmp) inputs:copy(inputs_tmp)
targets:copy(targets_tmp) targets:copy(targets_tmp)
@ -43,13 +45,12 @@ local function minibatch_adam(model, criterion,
return f, gradParameters return f, gradParameters
end end
optim.adam(feval, parameters, config) optim.adam(feval, parameters, config)
c = c + 1 c = c + 1
if c % 20 == 0 then if c % 50 == 0 then
collectgarbage() collectgarbage()
end end
end end
xlua.progress(#train_x, #train_x) xlua.progress(train_x:size(1), train_x:size(1))
return { loss = sum_loss / count_loss} return { loss = sum_loss / count_loss}
end end

View file

@ -32,11 +32,13 @@ cmd:option("-random_overlay_rate", 0.0, 'data augmentation using flipped image o
cmd:option("-random_half_rate", 0.0, 'data augmentation using half resolution image (0.0-1.0)') cmd:option("-random_half_rate", 0.0, 'data augmentation using half resolution image (0.0-1.0)')
cmd:option("-random_unsharp_mask_rate", 0.0, 'data augmentation using unsharp mask (0.0-1.0)') cmd:option("-random_unsharp_mask_rate", 0.0, 'data augmentation using unsharp mask (0.0-1.0)')
cmd:option("-scale", 2.0, 'scale factor (2)') cmd:option("-scale", 2.0, 'scale factor (2)')
cmd:option("-learning_rate", 0.00025, 'learning rate for adam') cmd:option("-learning_rate", 0.001, 'learning rate for adam')
cmd:option("-crop_size", 46, 'crop size') cmd:option("-crop_size", 46, 'crop size')
cmd:option("-max_size", 256, 'if image is larger than max_size, image will be crop to max_size randomly') cmd:option("-max_size", 256, 'if image is larger than max_size, image will be crop to max_size randomly')
cmd:option("-batch_size", 8, 'mini batch size') cmd:option("-batch_size", 32, 'mini batch size')
cmd:option("-epoch", 200, 'number of total epochs to run') cmd:option("-patches", 16, 'number of patch samples')
cmd:option("-inner_epoch", 4, 'number of inner epochs')
cmd:option("-epoch", 30, 'number of epochs to run')
cmd:option("-thread", -1, 'number of CPU threads') cmd:option("-thread", -1, 'number of CPU threads')
cmd:option("-jpeg_chroma_subsampling_rate", 0.0, 'the rate of YUV 4:2:0/YUV 4:4:4 in denoising training (0.0-1.0)') cmd:option("-jpeg_chroma_subsampling_rate", 0.0, 'the rate of YUV 4:2:0/YUV 4:4:4 in denoising training (0.0-1.0)')
cmd:option("-validation_rate", 0.05, 'validation-set rate (number_of_training_images * validation_rate > 1)') cmd:option("-validation_rate", 0.05, 'validation-set rate (number_of_training_images * validation_rate > 1)')

101
train.lua
View file

@ -35,14 +35,14 @@ local function split_data(x, test_size)
end end
return train_x, valid_x return train_x, valid_x
end end
local function make_validation_set(x, transformer, n, batch_size) local function make_validation_set(x, transformer, n, patches)
n = n or 4 n = n or 4
local data = {} local data = {}
for i = 1, #x do for i = 1, #x do
for k = 1, math.max(n / batch_size, 1) do for k = 1, math.max(n / patches, 1) do
local xy = transformer(x[i], true, batch_size) local xy = transformer(x[i], true, patches)
local tx = torch.Tensor(batch_size, xy[1][1]:size(1), xy[1][1]:size(2), xy[1][1]:size(3)) local tx = torch.Tensor(patches, xy[1][1]:size(1), xy[1][1]:size(2), xy[1][1]:size(3))
local ty = torch.Tensor(batch_size, xy[1][2]:size(1), xy[1][2]:size(2), xy[1][2]:size(3)) local ty = torch.Tensor(patches, xy[1][2]:size(1), xy[1][2]:size(2), xy[1][2]:size(3))
for j = 1, #xy do for j = 1, #xy do
tx[j]:copy(xy[j][1]) tx[j]:copy(xy[j][1])
ty[j]:copy(xy[j][2]) ty[j]:copy(xy[j][2])
@ -83,14 +83,15 @@ local function create_criterion(model)
end end
local function transformer(x, is_validation, n, offset) local function transformer(x, is_validation, n, offset)
x = compression.decompress(x) x = compression.decompress(x)
n = n or settings.batch_size; n = n or settings.patches
if is_validation == nil then is_validation = false end if is_validation == nil then is_validation = false end
local random_color_noise_rate = nil local random_color_noise_rate = nil
local random_overlay_rate = nil local random_overlay_rate = nil
local active_cropping_rate = nil local active_cropping_rate = nil
local active_cropping_tries = nil local active_cropping_tries = nil
if is_validation then if is_validation then
active_cropping_rate = 0.0 active_cropping_rate = 0
active_cropping_tries = 0 active_cropping_tries = 0
random_color_noise_rate = 0.0 random_color_noise_rate = 0.0
random_overlay_rate = 0.0 random_overlay_rate = 0.0
@ -137,7 +138,24 @@ local function transformer(x, is_validation, n, offset)
end end
end end
local function resampling(x, y, train_x, transformer, input_size, target_size)
print("## resampling")
for t = 1, #train_x do
xlua.progress(t, #train_x)
local xy = transformer(train_x[t], false, settings.patches)
for i = 1, #xy do
local index = (t - 1) * settings.patches + i
x[index]:copy(xy[i][1])
y[index]:copy(xy[i][2])
end
if t % 50 == 0 then
collectgarbage()
end
end
end
local function train() local function train()
local LR_MIN = 1.0e-5
local model = srcnn.create(settings.method, settings.backend, settings.color) local model = srcnn.create(settings.method, settings.backend, settings.color)
local offset = reconstruct.offset_size(model) local offset = reconstruct.offset_size(model)
local pairwise_func = function(x, is_validation, n) local pairwise_func = function(x, is_validation, n)
@ -145,12 +163,12 @@ local function train()
end end
local criterion = create_criterion(model) local criterion = create_criterion(model)
local x = torch.load(settings.images) local x = torch.load(settings.images)
local lrd_count = 0
local train_x, valid_x = split_data(x, math.floor(settings.validation_rate * #x)) local train_x, valid_x = split_data(x, math.floor(settings.validation_rate * #x))
local adam_config = { local adam_config = {
learningRate = settings.learning_rate, learningRate = settings.learning_rate,
xBatchSize = settings.batch_size, xBatchSize = settings.batch_size,
} }
local lrd_count = 0
local ch = nil local ch = nil
if settings.color == "y" then if settings.color == "y" then
ch = 1 ch = 1
@ -161,48 +179,53 @@ local function train()
print("# make validation-set") print("# make validation-set")
local valid_xy = make_validation_set(valid_x, pairwise_func, local valid_xy = make_validation_set(valid_x, pairwise_func,
settings.validation_crops, settings.validation_crops,
settings.batch_size) settings.patches)
valid_x = nil valid_x = nil
collectgarbage() collectgarbage()
model:cuda() model:cuda()
print("load .. " .. #train_x) print("load .. " .. #train_x)
local x = torch.Tensor(settings.patches * #train_x,
ch, settings.crop_size, settings.crop_size)
local y = torch.Tensor(settings.patches * #train_x,
ch * (settings.crop_size - offset * 2) * (settings.crop_size - offset * 2)):zero()
for epoch = 1, settings.epoch do for epoch = 1, settings.epoch do
model:training() model:training()
print("# " .. epoch) print("# " .. epoch)
print(minibatch_adam(model, criterion, train_x, adam_config, resampling(x, y, train_x, pairwise_func)
pairwise_func, for i = 1, settings.inner_epoch do
{ch, settings.crop_size, settings.crop_size}, print(minibatch_adam(model, criterion, x, y, adam_config))
{ch, settings.crop_size - offset * 2, settings.crop_size - offset * 2} model:evaluate()
)) print("# validation")
model:evaluate() local score = validate(model, criterion, valid_xy)
print("# validation") if score < best_score then
local score = validate(model, criterion, valid_xy) local test_image = image_loader.load_float(settings.test) -- reload
if score < best_score then
local test_image = image_loader.load_float(settings.test) -- reload
lrd_count = 0
best_score = score
print("* update best model")
torch.save(settings.model_file, model)
if settings.method == "noise" then
local log = path.join(settings.model_dir,
("noise%d_best.png"):format(settings.noise_level))
save_test_jpeg(model, test_image, log)
elseif settings.method == "scale" then
local log = path.join(settings.model_dir,
("scale%.1f_best.png"):format(settings.scale))
save_test_scale(model, test_image, log)
end
else
lrd_count = lrd_count + 1
if lrd_count > 5 then
lrd_count = 0 lrd_count = 0
adam_config.learningRate = adam_config.learningRate * 0.9 best_score = score
print("* learning rate decay: " .. adam_config.learningRate) print("* update best model")
torch.save(settings.model_file, model)
if settings.method == "noise" then
local log = path.join(settings.model_dir,
("noise%d_best.png"):format(settings.noise_level))
save_test_jpeg(model, test_image, log)
elseif settings.method == "scale" then
local log = path.join(settings.model_dir,
("scale%.1f_best.png"):format(settings.scale))
save_test_scale(model, test_image, log)
end
else
lrd_count = lrd_count + 1
if lrd_count > 2 and adam_config.learningRate > LR_MIN then
adam_config.learningRate = adam_config.learningRate * 0.8
print("* learning rate decay: " .. adam_config.learningRate)
lrd_count = 0
end
end end
print("current: " .. score .. ", best: " .. best_score)
collectgarbage()
end end
print("current: " .. score .. ", best: " .. best_score)
collectgarbage()
end end
end end
if settings.gpu > 0 then if settings.gpu > 0 then