Make the performance benchmark practical
This commit is contained in:
parent
4c14b2faf0
commit
17b8de2d36
|
@ -890,18 +890,31 @@ function srcnn.upcunet_v2(backend, ch)
|
||||||
end
|
end
|
||||||
local function bench()
|
local function bench()
|
||||||
local sys = require 'sys'
|
local sys = require 'sys'
|
||||||
cudnn.benchmark = false
|
cudnn.benchmark = true
|
||||||
local model = nil
|
local model = nil
|
||||||
local arch = {"upconv_7", "upcunet", "upcunet_v2"}
|
local arch = {"upconv_7", "upcunet", "upcunet_v2"}
|
||||||
local backend = "cunn"
|
local backend = "cudnn"
|
||||||
for k = 1, #arch do
|
for k = 1, #arch do
|
||||||
model = srcnn[arch[k]](backend, 3):cuda()
|
model = srcnn[arch[k]](backend, 3):cuda()
|
||||||
model:training()
|
model:evaluate()
|
||||||
|
local dummy = nil
|
||||||
|
-- warn
|
||||||
|
for i = 1, 20 do
|
||||||
|
local x = torch.Tensor(4, 3, 172, 172):uniform():cuda()
|
||||||
|
model:forward(x)
|
||||||
|
end
|
||||||
t = sys.clock()
|
t = sys.clock()
|
||||||
for i = 1, 10 do
|
for i = 1, 20 do
|
||||||
model:forward(torch.Tensor(1, 3, 172, 172):zero():cuda())
|
local x = torch.Tensor(4, 3, 172, 172):uniform():cuda()
|
||||||
|
local z = model:forward(x)
|
||||||
|
if dummy == nil then
|
||||||
|
dummy = z:clone()
|
||||||
|
else
|
||||||
|
dummy:add(z)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
print(arch[k], sys.clock() - t)
|
print(arch[k], sys.clock() - t)
|
||||||
|
model:clearState()
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
function srcnn.create(model_name, backend, color)
|
function srcnn.create(model_name, backend, color)
|
||||||
|
@ -935,4 +948,5 @@ model:training()
|
||||||
print(model:forward(torch.Tensor(1, 3, 76, 76):zero():cuda()))
|
print(model:forward(torch.Tensor(1, 3, 76, 76):zero():cuda()))
|
||||||
os.exit()
|
os.exit()
|
||||||
--]]
|
--]]
|
||||||
|
|
||||||
return srcnn
|
return srcnn
|
||||||
|
|
Loading…
Reference in a new issue