Add -force_cudnn option; support for cuDNN in waifu2x.lua/web.lua
This commit is contained in:
parent
599da6a665
commit
af74a67bd1
|
@ -16,6 +16,15 @@ else
|
||||||
pcall(load_cunn)
|
pcall(load_cunn)
|
||||||
pcall(load_cudnn)
|
pcall(load_cudnn)
|
||||||
w2nn = {}
|
w2nn = {}
|
||||||
|
|
||||||
|
function w2nn.load_model(model_path, force_cudnn)
|
||||||
|
local model = torch.load(model_path, "ascii")
|
||||||
|
if force_cudnn then
|
||||||
|
model = cudnn.convert(model, cudnn)
|
||||||
|
end
|
||||||
|
model:cuda():evaluate()
|
||||||
|
return model
|
||||||
|
end
|
||||||
require 'LeakyReLU'
|
require 'LeakyReLU'
|
||||||
require 'LeakyReLU_deprecated'
|
require 'LeakyReLU_deprecated'
|
||||||
require 'DepthExpand2x'
|
require 'DepthExpand2x'
|
||||||
|
|
35
waifu2x.lua
35
waifu2x.lua
|
@ -59,7 +59,7 @@ local function convert_image(opt)
|
||||||
opt.o = format_output(opt, opt.i)
|
opt.o = format_output(opt, opt.i)
|
||||||
if opt.m == "noise" then
|
if opt.m == "noise" then
|
||||||
local model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
|
local model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
|
||||||
local model = torch.load(model_path, "ascii")
|
local model = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
if not model then
|
if not model then
|
||||||
error("Load Error: " .. model_path)
|
error("Load Error: " .. model_path)
|
||||||
end
|
end
|
||||||
|
@ -69,7 +69,7 @@ local function convert_image(opt)
|
||||||
print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
|
print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
|
||||||
elseif opt.m == "scale" then
|
elseif opt.m == "scale" then
|
||||||
local model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
local model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
||||||
local model = torch.load(model_path, "ascii")
|
local model = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
if not model then
|
if not model then
|
||||||
error("Load Error: " .. model_path)
|
error("Load Error: " .. model_path)
|
||||||
end
|
end
|
||||||
|
@ -82,8 +82,8 @@ local function convert_image(opt)
|
||||||
local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
|
local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
|
||||||
if path.exists(model_path) then
|
if path.exists(model_path) then
|
||||||
local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
||||||
local t, scale_model = pcall(torch.load, scale_model_path, "ascii")
|
local t, scale_model = pcall(load_model, scale_model_path, opt.force_cudnn)
|
||||||
local model = torch.load(model_path, "ascii")
|
local model = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
if not t then
|
if not t then
|
||||||
scale_model = model
|
scale_model = model
|
||||||
end
|
end
|
||||||
|
@ -94,9 +94,9 @@ local function convert_image(opt)
|
||||||
print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
|
print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
|
||||||
else
|
else
|
||||||
local noise_model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
|
local noise_model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
|
||||||
local noise_model = torch.load(noise_model_path, "ascii")
|
local noise_model = w2nn.load_model(noise_model_path, opt.force_cudnn)
|
||||||
local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
||||||
local scale_model = torch.load(scale_model_path, "ascii")
|
local scale_model = w2nn.load_model(scale_model_path, opt.force_cudnn)
|
||||||
local t = sys.clock()
|
local t = sys.clock()
|
||||||
x = alpha_util.make_border(x, alpha, reconstruct.offset_size(scale_model))
|
x = alpha_util.make_border(x, alpha, reconstruct.offset_size(scale_model))
|
||||||
x = image_f(noise_model, x, opt.crop_size, opt.batch_size)
|
x = image_f(noise_model, x, opt.crop_size, opt.batch_size)
|
||||||
|
@ -129,24 +129,24 @@ local function convert_frames(opt)
|
||||||
end
|
end
|
||||||
if opt.m == "scale" then
|
if opt.m == "scale" then
|
||||||
model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
||||||
scale_model = torch.load(model_path, "ascii")
|
scale_model = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
elseif opt.m == "noise" then
|
elseif opt.m == "noise" then
|
||||||
model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
|
model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
|
||||||
noise_model[opt.noise_level] = torch.load(model_path, "ascii")
|
noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
elseif opt.m == "noise_scale" then
|
elseif opt.m == "noise_scale" then
|
||||||
local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
|
local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
|
||||||
if path.exists(model_path) then
|
if path.exists(model_path) then
|
||||||
noise_scale_model[opt.noise_level] = torch.load(model_path, "ascii")
|
noise_scale_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
||||||
t, scale_model = pcall(torch.load, model_path, "ascii")
|
t, scale_model = pcall(load_model, model_path, opt.force_cudnn)
|
||||||
if not t then
|
if not t then
|
||||||
scale_model = noise_scale_model[opt.noise_level]
|
scale_model = noise_scale_model[opt.noise_level]
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
|
||||||
scale_model = torch.load(model_path, "ascii")
|
scale_model = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
|
model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
|
||||||
noise_model[opt.noise_level] = torch.load(model_path, "ascii")
|
noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
local fp = io.open(opt.l)
|
local fp = io.open(opt.l)
|
||||||
|
@ -214,6 +214,7 @@ local function waifu2x()
|
||||||
cmd:option("-thread", -1, "number of CPU threads")
|
cmd:option("-thread", -1, "number of CPU threads")
|
||||||
cmd:option("-tta", 0, '8x slower and slightly high quality (0|1)')
|
cmd:option("-tta", 0, '8x slower and slightly high quality (0|1)')
|
||||||
cmd:option("-tta_level", 8, 'TTA level (2|4|8)')
|
cmd:option("-tta_level", 8, 'TTA level (2|4|8)')
|
||||||
|
cmd:option("-force_cudnn", 0, 'use cuDNN backend (0|1)')
|
||||||
|
|
||||||
local opt = cmd:parse(arg)
|
local opt = cmd:parse(arg)
|
||||||
if opt.thread > 0 then
|
if opt.thread > 0 then
|
||||||
|
@ -221,9 +222,17 @@ local function waifu2x()
|
||||||
end
|
end
|
||||||
if cudnn then
|
if cudnn then
|
||||||
cudnn.fastest = true
|
cudnn.fastest = true
|
||||||
|
if opt.l:len() > 0 then
|
||||||
|
cudnn.benchmark = true -- find fastest algo
|
||||||
|
else
|
||||||
cudnn.benchmark = false
|
cudnn.benchmark = false
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
if opt.force_cudnn == 1 then
|
||||||
|
opt.force_cudnn = true
|
||||||
|
else
|
||||||
|
opt.force_cudnn = false
|
||||||
|
end
|
||||||
if string.len(opt.l) == 0 then
|
if string.len(opt.l) == 0 then
|
||||||
convert_image(opt)
|
convert_image(opt)
|
||||||
else
|
else
|
||||||
|
|
22
web.lua
22
web.lua
|
@ -28,6 +28,8 @@ cmd:option("-gpu", 1, 'Device ID')
|
||||||
cmd:option("-crop_size", 128, 'patch size per process')
|
cmd:option("-crop_size", 128, 'patch size per process')
|
||||||
cmd:option("-batch_size", 1, 'batch size')
|
cmd:option("-batch_size", 1, 'batch size')
|
||||||
cmd:option("-thread", -1, 'number of CPU threads')
|
cmd:option("-thread", -1, 'number of CPU threads')
|
||||||
|
cmd:option("-force_cudnn", 0, 'use cuDNN backend (0|1)')
|
||||||
|
|
||||||
local opt = cmd:parse(arg)
|
local opt = cmd:parse(arg)
|
||||||
cutorch.setDevice(opt.gpu)
|
cutorch.setDevice(opt.gpu)
|
||||||
torch.setdefaulttensortype('torch.FloatTensor')
|
torch.setdefaulttensortype('torch.FloatTensor')
|
||||||
|
@ -36,18 +38,20 @@ if opt.thread > 0 then
|
||||||
end
|
end
|
||||||
if cudnn then
|
if cudnn then
|
||||||
cudnn.fastest = true
|
cudnn.fastest = true
|
||||||
cudnn.benchmark = false
|
cudnn.benchmark = true
|
||||||
end
|
end
|
||||||
|
opt.force_cudnn = opt.force_cudnn == 1
|
||||||
local ART_MODEL_DIR = path.join(ROOT, "models", "upconv_7", "art")
|
local ART_MODEL_DIR = path.join(ROOT, "models", "upconv_7", "art")
|
||||||
local PHOTO_MODEL_DIR = path.join(ROOT, "models", "photo")
|
local PHOTO_MODEL_DIR = path.join(ROOT, "models", "photo")
|
||||||
local art_scale2_model = torch.load(path.join(ART_MODEL_DIR, "scale2.0x_model.t7"), "ascii")
|
local art_scale2_model = w2nn.load_model(path.join(ART_MODEL_DIR, "scale2.0x_model.t7"), opt.force_cudnn)
|
||||||
local art_noise1_model = torch.load(path.join(ART_MODEL_DIR, "noise1_model.t7"), "ascii")
|
local art_noise1_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise1_model.t7"), opt.force_cudnn)
|
||||||
local art_noise2_model = torch.load(path.join(ART_MODEL_DIR, "noise2_model.t7"), "ascii")
|
local art_noise2_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise2_model.t7"), opt.force_cudnn)
|
||||||
local art_noise3_model = torch.load(path.join(ART_MODEL_DIR, "noise3_model.t7"), "ascii")
|
local art_noise3_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise3_model.t7"), opt.force_cudnn)
|
||||||
local photo_scale2_model = torch.load(path.join(PHOTO_MODEL_DIR, "scale2.0x_model.t7"), "ascii")
|
local photo_scale2_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "scale2.0x_model.t7"), opt.force_cudnn)
|
||||||
local photo_noise1_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise1_model.t7"), "ascii")
|
local photo_noise1_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise1_model.t7"), opt.force_cudnn)
|
||||||
local photo_noise2_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise2_model.t7"), "ascii")
|
local photo_noise2_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise2_model.t7"), opt.force_cudnn)
|
||||||
local photo_noise3_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise3_model.t7"), "ascii")
|
local photo_noise3_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise3_model.t7"), opt.force_cudnn)
|
||||||
|
collectgarbage()
|
||||||
local CLEANUP_MODEL = false -- if you are using the low memory GPU, you could use this flag.
|
local CLEANUP_MODEL = false -- if you are using the low memory GPU, you could use this flag.
|
||||||
local CACHE_DIR = path.join(ROOT, "cache")
|
local CACHE_DIR = path.join(ROOT, "cache")
|
||||||
local MAX_NOISE_IMAGE = 2560 * 2560
|
local MAX_NOISE_IMAGE = 2560 * 2560
|
||||||
|
|
Loading…
Reference in a new issue