diff --git a/lib/w2nn.lua b/lib/w2nn.lua index 16b9996..0643a2f 100644 --- a/lib/w2nn.lua +++ b/lib/w2nn.lua @@ -16,6 +16,15 @@ else pcall(load_cunn) pcall(load_cudnn) w2nn = {} + + function w2nn.load_model(model_path, force_cudnn) + local model = torch.load(model_path, "ascii") + if force_cudnn then + model = cudnn.convert(model, cudnn) + end + model:cuda():evaluate() + return model + end require 'LeakyReLU' require 'LeakyReLU_deprecated' require 'DepthExpand2x' diff --git a/waifu2x.lua b/waifu2x.lua index 24b7ca8..c3ea874 100644 --- a/waifu2x.lua +++ b/waifu2x.lua @@ -59,7 +59,7 @@ local function convert_image(opt) opt.o = format_output(opt, opt.i) if opt.m == "noise" then local model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level)) - local model = torch.load(model_path, "ascii") + local model = w2nn.load_model(model_path, opt.force_cudnn) if not model then error("Load Error: " .. model_path) end @@ -69,7 +69,7 @@ local function convert_image(opt) print(opt.o .. ": " .. (sys.clock() - t) .. " sec") elseif opt.m == "scale" then local model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale)) - local model = torch.load(model_path, "ascii") + local model = w2nn.load_model(model_path, opt.force_cudnn) if not model then error("Load Error: " .. model_path) end @@ -82,8 +82,8 @@ local function convert_image(opt) local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale)) if path.exists(model_path) then local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale)) - local t, scale_model = pcall(torch.load, scale_model_path, "ascii") - local model = torch.load(model_path, "ascii") + local t, scale_model = pcall(load_model, scale_model_path, opt.force_cudnn) + local model = w2nn.load_model(model_path, opt.force_cudnn) if not t then scale_model = model end @@ -94,9 +94,9 @@ local function convert_image(opt) print(opt.o .. ": " .. (sys.clock() - t) .. " sec") else local noise_model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level)) - local noise_model = torch.load(noise_model_path, "ascii") + local noise_model = w2nn.load_model(noise_model_path, opt.force_cudnn) local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale)) - local scale_model = torch.load(scale_model_path, "ascii") + local scale_model = w2nn.load_model(scale_model_path, opt.force_cudnn) local t = sys.clock() x = alpha_util.make_border(x, alpha, reconstruct.offset_size(scale_model)) x = image_f(noise_model, x, opt.crop_size, opt.batch_size) @@ -129,24 +129,24 @@ local function convert_frames(opt) end if opt.m == "scale" then model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale)) - scale_model = torch.load(model_path, "ascii") + scale_model = w2nn.load_model(model_path, opt.force_cudnn) elseif opt.m == "noise" then model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level)) - noise_model[opt.noise_level] = torch.load(model_path, "ascii") + noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn) elseif opt.m == "noise_scale" then local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale)) if path.exists(model_path) then - noise_scale_model[opt.noise_level] = torch.load(model_path, "ascii") + noise_scale_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn) model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale)) - t, scale_model = pcall(torch.load, model_path, "ascii") + t, scale_model = pcall(load_model, model_path, opt.force_cudnn) if not t then scale_model = noise_scale_model[opt.noise_level] end else model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale)) - scale_model = torch.load(model_path, "ascii") + scale_model = w2nn.load_model(model_path, opt.force_cudnn) model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level)) - noise_model[opt.noise_level] = torch.load(model_path, "ascii") + noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn) end end local fp = io.open(opt.l) @@ -214,16 +214,25 @@ local function waifu2x() cmd:option("-thread", -1, "number of CPU threads") cmd:option("-tta", 0, '8x slower and slightly high quality (0|1)') cmd:option("-tta_level", 8, 'TTA level (2|4|8)') - + cmd:option("-force_cudnn", 0, 'use cuDNN backend (0|1)') + local opt = cmd:parse(arg) if opt.thread > 0 then torch.setnumthreads(opt.thread) end if cudnn then cudnn.fastest = true - cudnn.benchmark = false + if opt.l:len() > 0 then + cudnn.benchmark = true -- find fastest algo + else + cudnn.benchmark = false + end + end + if opt.force_cudnn == 1 then + opt.force_cudnn = true + else + opt.force_cudnn = false end - if string.len(opt.l) == 0 then convert_image(opt) else diff --git a/web.lua b/web.lua index 53424a5..a518055 100644 --- a/web.lua +++ b/web.lua @@ -28,6 +28,8 @@ cmd:option("-gpu", 1, 'Device ID') cmd:option("-crop_size", 128, 'patch size per process') cmd:option("-batch_size", 1, 'batch size') cmd:option("-thread", -1, 'number of CPU threads') +cmd:option("-force_cudnn", 0, 'use cuDNN backend (0|1)') + local opt = cmd:parse(arg) cutorch.setDevice(opt.gpu) torch.setdefaulttensortype('torch.FloatTensor') @@ -36,18 +38,20 @@ if opt.thread > 0 then end if cudnn then cudnn.fastest = true - cudnn.benchmark = false + cudnn.benchmark = true end +opt.force_cudnn = opt.force_cudnn == 1 local ART_MODEL_DIR = path.join(ROOT, "models", "upconv_7", "art") local PHOTO_MODEL_DIR = path.join(ROOT, "models", "photo") -local art_scale2_model = torch.load(path.join(ART_MODEL_DIR, "scale2.0x_model.t7"), "ascii") -local art_noise1_model = torch.load(path.join(ART_MODEL_DIR, "noise1_model.t7"), "ascii") -local art_noise2_model = torch.load(path.join(ART_MODEL_DIR, "noise2_model.t7"), "ascii") -local art_noise3_model = torch.load(path.join(ART_MODEL_DIR, "noise3_model.t7"), "ascii") -local photo_scale2_model = torch.load(path.join(PHOTO_MODEL_DIR, "scale2.0x_model.t7"), "ascii") -local photo_noise1_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise1_model.t7"), "ascii") -local photo_noise2_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise2_model.t7"), "ascii") -local photo_noise3_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise3_model.t7"), "ascii") +local art_scale2_model = w2nn.load_model(path.join(ART_MODEL_DIR, "scale2.0x_model.t7"), opt.force_cudnn) +local art_noise1_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise1_model.t7"), opt.force_cudnn) +local art_noise2_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise2_model.t7"), opt.force_cudnn) +local art_noise3_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise3_model.t7"), opt.force_cudnn) +local photo_scale2_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "scale2.0x_model.t7"), opt.force_cudnn) +local photo_noise1_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise1_model.t7"), opt.force_cudnn) +local photo_noise2_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise2_model.t7"), opt.force_cudnn) +local photo_noise3_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise3_model.t7"), opt.force_cudnn) +collectgarbage() local CLEANUP_MODEL = false -- if you are using the low memory GPU, you could use this flag. local CACHE_DIR = path.join(ROOT, "cache") local MAX_NOISE_IMAGE = 2560 * 2560