diff --git a/README.md b/README.md index 37dacfa..1d4d9b6 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ luarocks install graphicsmagick # upgrade luarocks install lua-csnappy luarocks install md5 luarocks install uuid +luarocks install csvigo PREFIX=$HOME/torch/install luarocks install turbo # if you need to use web application ``` diff --git a/convert_data.lua b/convert_data.lua index d8e9868..29c7cee 100644 --- a/convert_data.lua +++ b/convert_data.lua @@ -3,6 +3,8 @@ local __FILE__ = (function() return string.gsub(debug.getinfo(2, 'S').source, "^ package.path = path.join(path.dirname(__FILE__), "lib", "?.lua;") .. package.path require 'image' +local cjson = require 'cjson' +local csvigo = require 'csvigo' local compression = require 'compression' local settings = require 'settings' local image_loader = require 'image_loader' @@ -10,6 +12,9 @@ local iproc = require 'iproc' local alpha_util = require 'alpha_util' local function crop_if_large(src, max_size) + if max_size < 0 then + return src + end local tries = 4 if src:size(2) >= max_size and src:size(3) >= max_size then local rect @@ -27,25 +32,59 @@ local function crop_if_large(src, max_size) return src end end +local function crop_if_large_pair(x, y, max_size) + if max_size < 0 then + return x, y + end + local scale_y = y:size(2) / x:size(2) + local mod = 4 + assert(x:size(3) == (y:size(3) / scale_y)) + + local tries = 4 + if y:size(2) > max_size and y:size(3) > max_size then + assert(max_size % 4 == 0) + local rect_x, rect_y + for i = 1, tries do + local yi = torch.random(0, y:size(2) - max_size) + local xi = torch.random(0, y:size(3) - max_size) + if mod then + yi = yi - (yi % mod) + xi = xi - (xi % mod) + end + rect_y = iproc.crop(y, xi, yi, xi + max_size, yi + max_size) + rect_x = iproc.crop(y, xi / scale_y, yi / scale_y, xi / scale_y + max_size / scale_y, yi / scale_y + max_size / scale_y) + -- ignore simple background + if rect_y:float():std() >= 0 then + break + end + end + return rect_x, rect_y + else + return x, y + end +end local function load_images(list) local MARGIN = 32 - local lines = utils.split(file.read(list), "\n") + local csv = csvigo.load({path = list, verbose = false, mode = "raw"}) local x = {} local skip_notice = false - for i = 1, #lines do - local line = lines[i] - local v = utils.split(line, ",") - local filename = v[1] - local filters = v[2] - if filters then - filters = utils.split(filters, ":") + for i = 1, #csv do + local filename = csv[i][1] + local csv_meta = csv[i][2] + if csv_meta and csv_meta:len() > 0 then + csv_meta = cjson.decode(csv_meta) + end + if csv_meta.filters then + filters = csv_meta.filters end local im, meta = image_loader.load_byte(filename) local skip = false + local alpha_color = torch.random(0, 1) + if meta and meta.alpha then if settings.use_transparent_png then - im = alpha_util.fill(im, meta.alpha, torch.random(0, 1)) + im = alpha_util.fill(im, meta.alpha, alpha_color) else skip = true end @@ -56,25 +95,35 @@ local function load_images(list) skip_notice = true end else - if settings.max_training_image_size > 0 then - im = crop_if_large(im, settings.max_training_image_size) - end - im = iproc.crop_mod4(im) - local scale = 1.0 - if settings.random_half_rate > 0.0 then - scale = 2.0 - end - if im then - if im:size(2) > (settings.crop_size * scale + MARGIN) and im:size(3) > (settings.crop_size * scale + MARGIN) then - table.insert(x, {compression.compress(im), {data = {filters = filters}}}) - else - io.stderr:write(string.format("\n%s: skip: image is too small (%d > size).\n", filename, settings.crop_size * scale + MARGIN)) + if csv_meta.x then + -- method == user + local yy = im + local xx, meta2 = image_loader.load_byte(csv_meta.x) + if meta2 and meta2.alpha then + xx = alpha_util.fill(xx, meta2.alpha, alpha_color) end + xx, yy = crop_if_large_pair(xx, yy, settings.max_training_image_size) + table.insert(x, {{y = compression.compress(yy), x = compression.compress(xx)}, + {data = {filters = filters, has_x = true}}}) else - io.stderr:write(string.format("\n%s: skip: load error.\n", filename)) + im = crop_if_large(im, settings.max_training_image_size) + im = iproc.crop_mod4(im) + local scale = 1.0 + if settings.random_half_rate > 0.0 then + scale = 2.0 + end + if im then + if im:size(2) > (settings.crop_size * scale + MARGIN) and im:size(3) > (settings.crop_size * scale + MARGIN) then + table.insert(x, {compression.compress(im), {data = {filters = filters}}}) + else + io.stderr:write(string.format("\n%s: skip: image is too small (%d > size).\n", filename, settings.crop_size * scale + MARGIN)) + end + else + io.stderr:write(string.format("\n%s: skip: load error.\n", filename)) + end end end - xlua.progress(i, #lines) + xlua.progress(i, #csv) if i % 10 == 0 then collectgarbage() end diff --git a/lib/pairwise_transform.lua b/lib/pairwise_transform.lua index 8884717..e54418e 100644 --- a/lib/pairwise_transform.lua +++ b/lib/pairwise_transform.lua @@ -4,5 +4,6 @@ local pairwise_transform = {} pairwise_transform = tablex.update(pairwise_transform, require('pairwise_transform_scale')) pairwise_transform = tablex.update(pairwise_transform, require('pairwise_transform_jpeg')) pairwise_transform = tablex.update(pairwise_transform, require('pairwise_transform_jpeg_scale')) +pairwise_transform = tablex.update(pairwise_transform, require('pairwise_transform_user')) return pairwise_transform diff --git a/lib/pairwise_transform_user.lua b/lib/pairwise_transform_user.lua new file mode 100644 index 0000000..9a84878 --- /dev/null +++ b/lib/pairwise_transform_user.lua @@ -0,0 +1,60 @@ +local pairwise_utils = require 'pairwise_transform_utils' +local iproc = require 'iproc' +local gm = require 'graphicsmagick' +local pairwise_transform = {} + +local function crop_if_large(x, y, scale_y, max_size, mod) + local tries = 4 + if y:size(2) > max_size and y:size(3) > max_size then + assert(max_size % 4 == 0) + local rect_x, rect_y + for i = 1, tries do + local yi = torch.random(0, y:size(2) - max_size) + local xi = torch.random(0, y:size(3) - max_size) + if mod then + yi = yi - (yi % mod) + xi = xi - (xi % mod) + end + rect_y = iproc.crop(y, xi, yi, xi + max_size, yi + max_size) + rect_x = iproc.crop(x, xi / scale_y, yi / scale_y, xi / scale_y + max_size / scale_y, yi / scale_y + max_size / scale_y) + -- ignore simple background + if rect_y:float():std() >= 0 then + break + end + end + return rect_x, rect_y + else + return x, y + end +end +function pairwise_transform.user(x, y, size, offset, n, options) + assert(x:size(1) == y:size(1)) + + local scale_y = y:size(2) / x:size(2) + assert(x:size(3) == y:size(3) / scale_y) + + x, y = crop_if_large(x, y, scale_y, options.max_size, options.scale, 2) + assert(x:size(3) == y:size(3) / scale_y and x:size(2) == y:size(2) / scale_y) + local batch = {} + local lowres_y = gm.Image(y, "RGB", "DHW"): + size(y:size(3) * 0.5, y:size(2) * 0.5, "Box"): + size(y:size(3), y:size(2), "Box"): + toTensor(t, "RGB", "DHW") + local xs, ys, ls = pairwise_utils.flip_augmentation(x, y, lowres_y) + for i = 1, n do + local t = (i % #xs) + 1 + local xc, yc = pairwise_utils.active_cropping(xs[t], ys[t], ls[t], size, scale_y, + options.active_cropping_rate, + options.active_cropping_tries) + xc = iproc.byte2float(xc) + yc = iproc.byte2float(yc) + if options.rgb then + else + yc = image.rgb2yuv(yc)[1]:reshape(1, yc:size(2), yc:size(3)) + xc = image.rgb2yuv(xc)[1]:reshape(1, xc:size(2), xc:size(3)) + end + table.insert(batch, {xc, iproc.crop(yc, offset, offset, size - offset, size - offset)}) + end + return batch +end +return pairwise_transform diff --git a/train.lua b/train.lua index b54d4a6..bd89d58 100644 --- a/train.lua +++ b/train.lua @@ -22,6 +22,13 @@ local function save_test_jpeg(model, rgb, file) local im, count = reconstruct.image(model, rgb) image.save(file, im) end +local function save_test_user(model, rgb, file) + if settings.scale == 1 then + save_test_jpeg(model, rgb, file) + else + save_test_scale(model, rgb, file) + end +end local function split_data(x, test_size) local index = torch.randperm(#x) local train_size = #x - test_size @@ -117,9 +124,15 @@ local function create_criterion(model, loss) end local function transformer(model, x, is_validation, n, offset) local meta = {data = {}} + local y = nil if type(x) == "table" and type(x[2]) == "table" then meta = x[2] - x = compression.decompress(x[1]) + if x[1].x and x[1].y then + y = compression.decompress(x[1].y) + x = compression.decompress(x[1].x) + else + x = compression.decompress(x[1]) + end else x = compression.decompress(x) end @@ -197,6 +210,15 @@ local function transformer(model, x, is_validation, n, offset) settings.noise_level, settings.crop_size, offset, n, conf) + elseif settings.method == "user" then + local conf = tablex.update({ + max_size = settings.max_size, + active_cropping_rate = active_cropping_rate, + active_cropping_tries = active_cropping_tries, + rgb = (settings.color == "rgb")}, meta) + return pairwise_transform.user(x, y, + settings.crop_size, offset, + n, conf) end end @@ -248,8 +270,12 @@ local function remove_small_image(x) for i = 1, #x do local xe, meta, x_s xe = x[i] - if type(xe) == "table" and type(xe[2]) == "table" then - x_s = compression.size(xe[1]) + if type(x) == "table" and type(x[2]) == "table" then + if xe[1].x and xe[1].y then + x_s = compression.size(xe[1].y) -- y size + else + x_s = compression.size(xe[1]) + end else x_s = compression.size(xe) end @@ -394,6 +420,11 @@ local function train() settings.scale, epoch, i)) save_test_scale(model, test_image, log) + elseif settings.method == "user" then + local log = path.join(settings.model_dir, + ("%s_best.%d-%d.png"):format(settings.name, + epoch, i)) + save_test_user(model, test_image, log) end else torch.save(settings.model_file, model:clearState(), "ascii") @@ -410,6 +441,10 @@ local function train() ("noise%d_scale%.1f_best.png"):format(settings.noise_level, settings.scale)) save_test_scale(model, test_image, log) + elseif settings.method == "user" then + local log = path.join(settings.model_dir, + ("%s_best.png"):format(settings.name)) + save_test_user(model, test_image, log) end end end diff --git a/waifu2x.lua b/waifu2x.lua index a0726b0..8aa43d9 100644 --- a/waifu2x.lua +++ b/waifu2x.lua @@ -112,6 +112,24 @@ local function convert_image(opt) print(opt.o .. ": " .. (sys.clock() - t) .. " sec") end end + elseif opt.m == "user" then + local model_path = opt.model_path + local model = w2nn.load_model(model_path, opt.force_cudnn) + if not model then + error("Load Error: " .. model_path) + end + local t = sys.clock() + + x = alpha_util.make_border(x, alpha, reconstruct.offset_size(model)) + if opt.scale == 1 then + new_x = image_f(model, x, opt.crop_size, opt.batch_size) + else + new_x = scale_f(model, opt.scale, x, opt.crop_size, opt.batch_size) + end + new_x = alpha_util.composite(new_x, alpha) -- TODO: should it use model? + if not opt.q then + print(opt.o .. ": " .. (sys.clock() - t) .. " sec") + end else error("undefined method:" .. opt.method) end @@ -121,6 +139,7 @@ local function convert_frames(opt) local model_path, scale_model, t local noise_scale_model = {} local noise_model = {} + local user_model = nil local scale_f, image_f if opt.tta == 1 then scale_f = function(model, scale, x, block_size, batch_size) @@ -156,6 +175,8 @@ local function convert_frames(opt) model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level)) noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn) end + elseif opt.m == "user" then + user_model = w2nn.load_model(opt.model_path, opt.force_cudnn) end local fp = io.open(opt.l) if not fp then @@ -189,6 +210,14 @@ local function convert_frames(opt) new_x = scale_f(scale_model, opt.scale, x, opt.crop_size, opt.batch_size) end new_x = alpha_util.composite(new_x, alpha, scale_model) + elseif opt.m == "user" then + x = alpha_util.make_border(x, alpha, reconstruct.offset_size(user_model)) + if opt.scale == 1 then + new_x = image_f(user_model, x, opt.crop_size, opt.batch_size) + else + new_x = scale_f(user_model, opt.scale, x, opt.crop_size, opt.batch_size) + end + new_x = alpha_util.composite(new_x, alpha) else error("undefined method:" .. opt.method) end @@ -218,7 +247,8 @@ local function waifu2x() cmd:option("-o", "(auto)", 'path to output file') cmd:option("-depth", 8, 'bit-depth of the output image (8|16)') cmd:option("-model_dir", "./models/upconv_7/art", 'path to model directory') - cmd:option("-m", "noise_scale", 'method (noise|scale|noise_scale)') + cmd:option("-name", "user", 'model name for user method') + cmd:option("-m", "noise_scale", 'method (noise|scale|noise_scale|user)') cmd:option("-method", "", 'same as -m') cmd:option("-noise_level", 1, '(1|2|3)') cmd:option("-crop_size", 128, 'patch size per process') @@ -247,6 +277,7 @@ local function waifu2x() end opt.force_cudnn = opt.force_cudnn == 1 opt.q = opt.q == 1 + opt.model_path = path.join(opt.model_dir, string.format("%s_model.t7", opt.name)) if string.len(opt.l) == 0 then convert_image(opt)