From 8d16fbaa9a29f56c0e2cb7db5954f6cf5a412194 Mon Sep 17 00:00:00 2001
From: nagadomi <nagadomi@nurs.or.jp>
Date: Sat, 8 Oct 2016 19:36:26 +0900
Subject: [PATCH] Update benchmark

---
 appendix/benchmark.md | 95 +++++++++++++++++++++++++++++--------------
 appendix/benchmark.sh | 34 ++++++++++++++++
 2 files changed, 98 insertions(+), 31 deletions(-)
 create mode 100755 appendix/benchmark.sh

diff --git a/appendix/benchmark.md b/appendix/benchmark.md
index 05b2592..47dc196 100644
--- a/appendix/benchmark.md
+++ b/appendix/benchmark.md
@@ -1,45 +1,78 @@
-# Benchmark results
+# Benchmarks
 
-Warning: This benchmark results is outdated. I will update soon.
+## Photo
 
-## Usage
+Note: waifu2x's photo models was trained on [kou's photo collection](http://photosku.com/photo/category/%E6%92%AE%E5%BD%B1%E8%80%85/kou/).
+Note: PSNR in this benchmark uses a MATLAB's rgb2ycbcr compatible function for converting grayscale image. I think it's not correct PSNR. But many paper used this metric.
 
-```
-th tools/benchmark.lua -dir path/to/dataset_dir -method scale -color y -model1_dir path/to/model_dir
-```
+command: 
+`th tools/benchmark.lua -dir <dataset_dir> -model1_dir <model_dir> -method scale -filter Catrom -color y -range_bug 1 -tta <0|1> -force_cudnn 1`
 
-## Dataset
+### Datasets
 
-    photo_test: 300 various photos.
-    art_test  : 90 artworks (PNG only).
+BSD100: https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/segbench/ (100 test images in BSD300)
+Urban100: https://github.com/jbhuang0604/SelfExSR
 
-## 2x upscaling model
+### 2x - PSNR 
 
-| Dataset/Model | anime\_style\_art(Y) | anime\_style\_art\_rgb | photo   | ukbench|
-|---------------|----------------------|------------------------|---------|--------|
-| photo\_test   |                29.83 |                  29.81 |**29.89**|  29.86 |
-| art\_test     |                36.02 |               **36.24**|  34.92  |  34.85 |
+| Dataset/Model | Bicubic       | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|---------------|------------------|------------------|
+| BSD100        | 29.558        | 31.427        | 31.640           | 31.749           |
+| Urban100      | 26.852        | 30.057        | 30.477           | 30.759           |
 
-The evaluation metric is PSNR(Y only), higher is better.
+### 2x with TTA - PSNR 
 
-## Denosing level 1 model
+Note: TTA is an ensemble technique that is supported by waifu2x. This method is 8x slower than non TTA method but it improves PSNR (~+0.1 on photo, ~+0.4 on art).
 
-| Dataset/Model            | anime\_style\_art | anime\_style\_art\_rgb | photo   |
-|--------------------------|-------------------|------------------------|---------|
-| photo\_test Quality 80   |             36.07 |               **36.20**|   36.01 |
-| photo\_test Quality 50,45|             31.72 |                 32.01  |**32.31**|
-| art\_test Quality 80     |             40.39 |               **42.48**|   40.35 |
-| art\_test Quality 50,45  |             35.45 |               **36.70**|   36.27 |
+| Dataset/Model | Bicubic       | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|---------------|------------------|------------------|
+| BSD100        | 29.558        | 31.474        | 31.705           | 31.812           |
+| Urban100      | 26.852        | 30.140        | 30.599           | 30.868           |
 
-The evaluation metric is PSNR(RGB), higher is better.
+### 2x - benchmark elapsed time (sec)
 
-## Denosing level 2 model
+| Dataset/Model | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|------------------|------------------|
+| BSD100        | 4.057         | 2.509            | 4.947            |
+| Urban100      | 16.349        | 7.083            | 14.178           |
 
-| Dataset/Model            | anime\_style\_art | anime\_style\_art\_rgb | photo   |
-|--------------------------|-------------------|------------------------|---------|
-| photo\_test Quality 80   |             34.03 |                  34.42 |**36.06**|
-| photo\_test Quality 50,45|             31.95 |                  32.31 |**32.42**|
-| art\_test Quality 80     |             39.20 |               **41.12**|   40.48 |
-| art\_test Quality 50,45  |             36.14 |               **37.78**|   36.55 |
+### 2x with TTA - benchmark elapsed time (sec)
+
+| Dataset/Model | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|------------------|------------------|
+| BSD100        | 36.611        | 20.219           | 42.486           |
+| Urban100      | 132.416       | 65.125           | 129.916          |
+
+## Art
+
+command: 
+`th tools/benchmark.lua -dir <dataset_dir> -model1_dir <model_dir> -method scale -filter Lanczos -color y -range_bug 1 -tta <0|1> -force_cudnn 1`
+
+### Dataset
+
+art_test: This dataset contains 85 various fan-arts. Sorry, This dataset is private. 
+
+### 2x - PSNR 
+
+| Dataset/Model | Bicubic       | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|---------------|-------------|----------------|----------------|
+| art_test      | 31.022        | 37.495      | 38.330         | 39.140         |
+
+### 2x with TTA - PSNR 
+
+| Dataset/Model | Bicubic       | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|---------------|-------------|----------------|----------------|
+| art_test      | 31.022        | 37.777      | 38.677         | 39.510         |
+
+### 2x - benchmark elapsed time (sec)
+
+| Dataset/Model | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|-------------|----------------|----------------|
+| art_test      | 20.681      | 7.683          | 17.667         |
+
+### 2x with TTA - benchmark elapsed time (sec)
+
+| Dataset/Model | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|-------------|----------------|----------------|
+| art_test      | 174.674     | 77.716         | 163.932        |
 
-The evaluation metric is PSNR(RGB), higher is better.
diff --git a/appendix/benchmark.sh b/appendix/benchmark.sh
new file mode 100755
index 0000000..8fdbad5
--- /dev/null
+++ b/appendix/benchmark.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+set -x
+
+benchmark_photo() {
+    dir=./benchmarks/${1}/${2}/${3}
+    mkdir -p ${dir}
+    th tools/benchmark.lua -dir data/${1} -model1\_dir models/${2}/photo -method scale -filter Catrom -color y -range\_bug 1 -tta ${3} -force_cudnn 1 -output_dir ${dir} -save_info 1 -show_progress 0 
+}
+run_benchmark_photo() {
+    for tta in 0 1
+    do
+	for dataset in bsd100 urban100
+	do
+	    benchmark_photo ${dataset} vgg_7 ${tta}
+	    benchmark_photo ${dataset} upconv_7 ${tta}
+	    benchmark_photo ${dataset} upconv_7l ${tta}
+	done
+    done
+}
+benchmark_art() {
+    dir=./benchmarks/${1}/${2}/${3}
+    mkdir -p ${dir}
+    th tools/benchmark.lua -dir data/${1} -model1\_dir models/${2}/art -method scale -filter Lanczos -color y -range\_bug 1 -tta ${3} -force_cudnn 1 -output_dir ${dir} -save_info 1 -show_progress 0 
+}
+run_benchmark_art() {
+    for tta in 0 1
+    do
+	benchmark_art art_test vgg_7 ${tta}
+	benchmark_art art_test upconv_7 ${tta}
+	benchmark_art art_test upconv_7l ${tta}
+    done
+}
+#run_benchmark_photo
+run_benchmark_art