#include "tmachine.h" #include "tpixelgr.h" #include "quickputP.h" //#include "tspecialstyleid.h" #include "tsystem.h" #include "tcolorstyles.h" #include "tpixelutils.h" //#include "tstopwatch.h" #ifndef TNZCORE_LIGHT #include "tpalette.h" #include "trastercm.h" #include "tropcm.h" #endif using namespace TConsts; #if defined(_WIN32) && defined(x64) #define USE_SSE2 #endif #ifdef USE_SSE2 #include // per SSE2 #endif #include //=========================================================================== /* Versione con estensione dell'ultimo pixel e con default_value per pixel "fuori" dall'immagine di ingresso. Sistemi di coordinate a meno di una traslazione: UV: coordinate dell'immagine di partenza ST: coordinate di filtro (il raggio del filtro e' intero in ST) FG: coordinate del filtro discretizzato XY: coordinate dell'immagine di arrivo Tra UV e ST c'e' una traslazione intera finche' non c'e' ingrandimento e non c'e' blur. Il blur aggiunge uno scale, e altrettanto fa l'ingrandimento. Tra ST e FG c'e' uno scale per la risoluzione del filtro. Oggetti: out : pixel di output (centro di ST e FG) ref : pixel di riferimento dell'immagine di input pix : pixel contribuente Notazione per le coordinate: obj_x : coordinate intere di obj obj_x_ : coordinate float di obj Notazione per le coppie di coordinate: obj1_obj2_x : coordinate intere di obj1 rispetto a obj2 obj1_obj2_x_ : coordinate float di obj1 rispetto a obj2 Matrici affini: aff_xy2uv : matrice di trasformazione delle coordinate da XY a UV aff0_xy2uv : stessa matrice con la parte di shift messa a 0 Una tantum: aff_uv2xy = aff aff_xy2uv = aff_inv (aff_uv2xy) aff0_uv2xy = aff_place (00, 00, aff_uv2xy) vedi il codice, comunque ottimizzo una rotazione seguita da una scalatura anisotropa. Cerco i fattori di scala facendo radici di somme di quadrati. Mi regolo sui fattori di scala come se dovessi considerare la scalatura da sola. In questo modo tutto si riporta alla vecchia maniera se i fattori di scala sono uguali. Se sono diversi il risultato e' comunque esatto per rotazioni di multipli di 90 gradi, e non ha discontinuita'. aff0_uv2st = aff_mult (aff0_xy2st, aff0_uv2xy) aff0_st2fg = aff_scale (filter_resolution, Aff_I) aff0_uv2fg = aff_mult (aff0_st2fg, aff0_uv2st) pix_ref_uv[] = tutti quelli che servono (vedi sotto) pix_ref_fg_ = AFF_M_V (aff0_uv2fg, pix_ref_uv) pix_ref_fg[] = ROUND (pix_ref_fg_) Ciclo su out_xy: out_uv_ = AFF_M_V (aff_xy2uv, out_xy) ref_uv = INT_LE (out_uv_) ref_out_uv_ = ref_uv - out_uv_ ref_out_fg_ = AFF_M_V (aff0_uv2fg, ref_out_uv_) ref_out_fg = ROUND (ref_out_fg_) Ciclo sui pix: pix_out_fg = pix_ref_fg + ref_out_fg weight = filter[pix_out_f] * filter[pix_out_g] Per sapere quali sono i pix che servono: -filter_fg_radius < pix_out_fg < filter_fg_radius min_pix_out_uv_ < pix_out_uv_ < max_pix_out_uv_ min_pix_out_uv_ < pix_ref_uv_ + ref_out_uv_ < max_pix_out_uv_ min_pix_out_uv_ + out_ref_uv_ < pix_ref_uv_ < max_pix_out_uv_ + out_ref_uv_ min_pix_out_uv_ < pix_ref_uv_ < max_pix_out_uv_ + 1 Ciclo su tutti quelli che soddisfano questa condizione 0 <= out_ref_uv_ < 1 -1 < ref_out_uv_ <= 0 min_ref_out_fg_ <= ref_out_fg_ <= max_ref_out_fg_ min_ref_out_fg <= ref_out_fg <= max_ref_out_fg -filter_fg_radius < pix_out_fg < filter_fg_radius -filter_fg_radius < pix_ref_fg + ref_out_fg < filter_fg_radius -filter_fg_radius - ref_out_fg < pix_ref_fg < filter_fg_radius - ref_out_fg -filter_fg_radius - max_ref_out_fg < pix_ref_fg < filter_fg_radius - min_ref_out_fg Scarto quelli che non soddisfano questa condizione Come e' fatto il filtro: TOP filter_array[filter_array_size-1] | filter[max_filter_fg] | filter[max_pix_out_fg] | filter[0] | filter[min_pix_out_fg] BOT filter[min_filter_fg] == filter_array[0] */ //------------------------------------------------------------------------------ //--------------------------------------------------------------------------- #if !defined(TNZ_LITTLE_ENDIAN) TNZ_LITTLE_ENDIAN undefined !! #endif // 2^36 * 1.5, (52-_shiftamt=36) uses limited precision to floor const double _double2fixmagic = 68719476736.0 * 1.5; // 16.16 fixed point representation const TINT32 _shiftamt = 16; #if TNZ_LITTLE_ENDIAN #define iexp_ 1 #define iman_ 0 #else #define iexp_ 0 #define iman_ 1 #endif inline TINT32 Double2Int(double val) { val = val + _double2fixmagic; return ((TINT32 *)&val)[iman_] >> _shiftamt; } #define DOUBLE_TO_INT32(D) \ (d2iaux = D, d2iaux += _double2fixmagic, \ (((TINT32 *)&(d2iaux))[iman_] >> _shiftamt)) //#define USE_DOUBLE_TO_INT //=========================================================================== inline double sinc0(double x, int a) { return sin((M_PI / (a)) * (x)) / ((M_PI / (a)) * (x)); } inline double sinc(double x, int a) { return (x) == 0.0 ? 1.0 : sin((M_PI / (a)) * (x)) / ((M_PI / (a)) * (x)); } inline UCHAR TO8BIT(float X) { return (((X) < 0.0F) ? 0 : (((X) > 255.0F) ? 255 : tround(X))); } const UCHAR BORDER_GR8 = 255; const UCHAR GREY_GR8 = 127; #ifdef USE_INLINE_FUNS //--------------------------------------------------------------------------- inline double aff0MV1(const TAffine &aff, double v1, double v2) { return aff.a11 * v1 + aff.a12 * v2; } //--------------------------------------------------------------------------- inline double affMV1(const TAffine &aff, double v1, double v2) { return aff.a11 * v1 + aff.a12 * v2 + aff.a13; } //--------------------------------------------------------------------------- inline double aff0MV2(const TAffine &aff, double v1, double v2) { return aff.a21 * v1 + aff.a22 * v2; } //--------------------------------------------------------------------------- inline double affMV2(const TAffine &aff, double v1, double v2) { return aff.a21 * v1 + aff.a22 * v2 + aff.a23; } #else // !USE_INLINE_FUNS #ifndef USE_DOUBLE_TO_INT #define ROUND(x) \ ((int)(((int)(-0.9F) == 0 && (x) < 0.0F) ? ((x)-0.5F) : ((x) + 0.5F))) #define ROUNDP(x) ((int)((x) + 0.5F)) #define FLOOR(x) ((int)(x) > (x) ? (int)(x)-1 : (int)(x)) #define CEIL(x) ((int)(x) < (x) ? (int)(x) + 1 : (int)(x)) #else #define ROUND(x) \ (DOUBLE_TO_INT32(((int)(-0.9F) == 0 && (x) < 0.0F) ? ((x)-0.5F) \ : ((x) + 0.5F))) #define ROUNDP(x) (DOUBLE_TO_INT32((x) + 0.5F)) #define FLOOR(x) \ (DOUBLE_TO_INT32(x) > (x) ? DOUBLE_TO_INT32(x) - 1 : DOUBLE_TO_INT32(x)) #define CEIL(x) \ (DOUBLE_TO_INT32(x) < (x) ? DOUBLE_TO_INT32(x) + 1 : DOUBLE_TO_INT32(x)) #endif #define INTLE(x) (FLOOR(x)) #define INTGT(x) (FLOOR(x) + 1) #define INTLT(x) (CEIL(x) - 1) #define INTGE(x) (CEIL(x)) #define NOT_LESS_THAN(MIN, X) \ { \ if ((X) < (MIN)) (X) = (MIN); \ } #define NOT_MORE_THAN(MAX, X) \ { \ if ((X) > (MAX)) (X) = (MAX); \ } #define tround ROUND #define troundp ROUNDP #define tfloor FLOOR #define tceil CEIL #define intLE INTLE #define intGT INTGT #define intLT INTLT #define intGE INTGE #define notLessThan NOT_LESS_THAN #define notMoreThan NOT_MORE_THAN #define AFF0_M_V_1(AFF, V1, V2) ((AFF).a11 * (V1) + (AFF).a12 * (V2)) #define AFF0_M_V_2(AFF, V1, V2) ((AFF).a21 * (V1) + (AFF).a22 * (V2)) #define AFF_M_V_1(AFF, V1, V2) ((AFF).a11 * (V1) + (AFF).a12 * (V2) + (AFF).a13) #define AFF_M_V_2(AFF, V1, V2) ((AFF).a21 * (V1) + (AFF).a22 * (V2) + (AFF).a23) #define aff0MV1 AFF0_M_V_1 #define aff0MV2 AFF0_M_V_2 #define affMV1 AFF_M_V_1 #define affMV2 AFF_M_V_2 #endif // USE_INLINE_FUNS //--------------------------------------------------------------------------- struct FILTER { int first, last; float *w; float *w_base; }; struct NOCALC { int first, last; }; //--------------------------------------------------------------------------- inline int get_filter_radius(TRop::ResampleFilterType flt_type) { switch (flt_type) { case TRop::Triangle: return 1; case TRop::Mitchell: return 2; case TRop::Cubic5: return 2; case TRop::Cubic75: return 2; case TRop::Cubic1: return 2; case TRop::Hann2: return 2; case TRop::Hann3: return 3; case TRop::Hamming2: return 2; case TRop::Hamming3: return 3; case TRop::Lanczos2: return 2; case TRop::Lanczos3: return 3; case TRop::Gauss: return 2; default: assert(!"bad filter type"); } return 0; } //--------------------------------------------------------------------------- //! Equivalent to aff * TRectD(u0, v0, u1, v0). inline void minmax(double u0, double v0, double u1, double v1, const TAffine &aff, double &x0, double &y0, double &x1, double &y1) { double xmin, ymin; double xmax, ymax; double x_a, y_a; double x_b, y_b; double x_c, y_c; double x_d, y_d; x_a = affMV1(aff, u0, v0); y_a = affMV2(aff, u0, v0); x_b = affMV1(aff, u1, v0); y_b = affMV2(aff, u1, v0); x_c = affMV1(aff, u1, v1); y_c = affMV2(aff, u1, v1); x_d = affMV1(aff, u0, v1); y_d = affMV2(aff, u0, v1); xmin = std::min(x_a, x_b); xmax = std::max(x_a, x_b); xmin = std::min(xmin, x_c); xmax = std::max(xmax, x_c); xmin = std::min(xmin, x_d); xmax = std::max(xmax, x_d); ymin = std::min(y_a, y_b); ymax = std::max(y_a, y_b); ymin = std::min(ymin, y_c); ymax = std::max(ymax, y_c); ymin = std::min(ymin, y_d); ymax = std::max(ymax, y_d); x0 = xmin; y0 = ymin; x1 = xmax; y1 = ymax; } /*---------------------------------------------------------------------------*/ /* inline bool trivial_rot (TAffine inv, int *dudx, int *dudy, int *dvdx, int *dvdy) { *dudx = 0; *dudy = 0; *dvdx = 0; *dvdy = 0; if (! (inv.a12 == 0 && inv.a21 == 0 || inv.a11 == 0 && inv.a22 == 0)) return false; if (! (inv.a11 == 1 || inv.a11 == 0 || inv.a11 == -1)) return false; if (! (inv.a12 == 1 || inv.a12 == 0 || inv.a12 == -1)) return false; if (! (inv.a21 == 1 || inv.a21 == 0 || inv.a21 == -1)) return false; if (! (inv.a22 == 1 || inv.a22 == 0 || inv.a22 == -1)) return false; *dudx = (int)inv.a11; *dudy = (int)inv.a12; *dvdx = (int)inv.a21; *dvdy = (int)inv.a22; return true; } */ //----------------------------------------------------------------------------- // // see Mitchell&Netravali, "Reconstruction Filters in Computer Graphics", // SIGGRAPH 88. Mitchell code provided by Paul Heckbert. // // //----------------------------------------------------------------------------- static double p0, p2, p3, q0, q1, q2, q3; inline void mitchellinit(double b, double c) { p0 = (6.0 - 2.0 * b) / 6.0; p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0; p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0; q0 = (8.0 * b + 24.0 * c) / 6.0; q1 = (-12.0 * b - 48.0 * c) / 6.0; q2 = (6.0 * b + 30.0 * c) / 6.0; q3 = (-b - 6.0 * c) / 6.0; } const int fltradMitchell = 2; static inline double flt_mitchell( double x) /*Mitchell & Netravali's two-param cubic*/ { static int mitfirsted; if (!mitfirsted) { mitchellinit(1.0 / 3.0, 1.0 / 3.0); mitfirsted = 1; } if (x < -2.0) return 0.0; if (x < -1.0) return (q0 - x * (q1 - x * (q2 - x * q3))); if (x < 0.0) return (p0 + x * x * (p2 - x * p3)); if (x < 1.0) return (p0 + x * x * (p2 + x * p3)); if (x < 2.0) return (q0 + x * (q1 + x * (q2 + x * q3))); return 0.0; } //----------------------------------------------------------------------------- const int fltradTriangle = 1; static inline double flt_triangle(double x) { if (x < -1.0) return 0.0; if (x < 0.0) return 1.0 + x; if (x < 1.0) return 1.0 - x; return 0.0; } //----------------------------------------------------------------------------- const int fltradCubic5 = 2; static inline double flt_cubic_5(double x) { if (x < 0.0) x = -x; if (x < 1.0) return 2.5 * x * x * x - 3.5 * x * x + 1; if (x < 2.0) return 0.5 * x * x * x - 2.5 * x * x + 4 * x - 2; return 0.0; } //----------------------------------------------------------------------------- const int fltradCubic75 = 2; static inline double flt_cubic_75(double x) { if (x < 0.0) x = -x; if (x < 1.0) return 2.75 * x * x * x - 3.75 * x * x + 1; if (x < 2.0) return 0.75 * x * x * x - 3.75 * x * x + 6 * x - 3; return 0.0; } //----------------------------------------------------------------------------- const int fltradCubic1 = 2; static inline double flt_cubic_1(double x) { if (x < 0.0) x = -x; if (x < 1.0) return 3 * x * x * x - 4 * x * x + 1; if (x < 2.0) return x * x * x - 5 * x * x + 8 * x - 4; return 0.0; } //----------------------------------------------------------------------------- const int fltradHann2 = 2; static inline double flt_hann2(double x) { if (x <= -2.0) return 0.0; if (x < 2.0) return sinc(x, 1) * (0.5 + 0.5 * cos(M_PI_2 * x)); return 0.0; } //----------------------------------------------------------------------------- const int fltradHann3 = 3; static inline double flt_hann3(double x) { if (x <= -3.0) return 0.0; if (x < 3.0) return sinc(x, 1) * (0.5 + 0.5 * cos(M_PI_3 * x)); return 0.0; } //----------------------------------------------------------------------------- const int fltradHamming2 = 2; static inline double flt_hamming2(double x) { if (x <= -2.0) return 0.0; if (x < 2.0) return sinc(x, 1) * (0.54 + 0.46 * cos(M_PI_2 * x)); return 0.0; } //----------------------------------------------------------------------------- const int fltradHamming3 = 3; static inline double flt_hamming3(double x) { if (x <= -3.0) return 0.0; if (x < 3.0) return sinc(x, 1) * (0.54 + 0.46 * cos(M_PI_3 * x)); return 0.0; } //----------------------------------------------------------------------------- const int fltradLanczos2 = 2; static inline double flt_lanczos2(double x) { if (x <= -2.0) return 0.0; if (x < 2.0) return sinc(x, 1) * sinc(x, 2); return 0.0; } //----------------------------------------------------------------------------- const int fltradLanczos3 = 3; static inline double flt_lanczos3(double x) { if (x <= -3.0) return 0.0; if (x < 3.0) return sinc(x, 1) * sinc(x, 3); return 0.0; } //----------------------------------------------------------------------------- const int fltradGauss = 2; static inline double flt_gauss(double x) { if (x <= -2.0) return 0.0; if (x < 2.0) return exp(-M_PI * x * x); return 0.0; /* exp(-M_PI*2*2)~=3.5*10^-6 */ } //----------------------------------------------------------------------------- const int fltradW1 = 2; static inline double flt_w_1(double x) { if (x < 0.0) x = -x; if (x < 0.5) return 1 - 0.5 * x; if (x < 1.0) return 1.5 - 1.5 * x; if (x < 1.5) return 0.5 - 0.5 * x; if (x < 2.0) return 0.5 * x - 1.0; return 0.0; } //----------------------------------------------------------------------------- static inline void get_flt_fun_rad(TRop::ResampleFilterType flt_type, double (**flt_fun)(double), double &flt_rad) { double (*fun)(double); double rad; switch (flt_type) { case TRop::Triangle: fun = flt_triangle; rad = fltradTriangle; break; case TRop::Mitchell: fun = flt_mitchell; rad = fltradMitchell; break; case TRop::Cubic5: fun = flt_cubic_5; rad = fltradCubic5; break; case TRop::Cubic75: fun = flt_cubic_75; rad = fltradCubic75; break; case TRop::Cubic1: fun = flt_cubic_1; rad = fltradCubic1; break; case TRop::Hann2: fun = flt_hann2; rad = fltradHann2; break; case TRop::Hann3: fun = flt_hann3; rad = fltradHann3; break; case TRop::Hamming2: fun = flt_hamming2; rad = fltradHamming2; break; case TRop::Hamming3: fun = flt_hamming3; rad = fltradHamming3; break; case TRop::Lanczos2: fun = flt_lanczos2; rad = fltradLanczos2; break; case TRop::Lanczos3: fun = flt_lanczos3; rad = fltradLanczos3; break; case TRop::Gauss: fun = flt_gauss; rad = fltradGauss; break; case 101: fun = flt_w_1; rad = fltradW1; break; default: fun = flt_triangle; rad = fltradTriangle; break; } if (flt_fun) *flt_fun = fun; flt_rad = rad; } //--------------------------------------------------------------------------- static FILTER *create_filter(TRop::ResampleFilterType flt_type, double blur, double dx_du, double delta_x, int lx, double &xrad, int &umin, int &umax, int &uwidth) { double (*flt_fun)(double); FILTER *filter, *f; double du_dx; int x; double u_; int u, ulo, uhi, ulomin, uhimax, m, n, nmax; double flt_rad, rad_u, rad_x, nodedist_u, nodefreq_u, sum, norm, w; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif get_flt_fun_rad(flt_type, &flt_fun, flt_rad); du_dx = 1 / dx_du; if (dx_du > 1) nodedist_u = blur; /* magnification */ else nodedist_u = du_dx * blur; /* minification */ rad_u = flt_rad * nodedist_u; rad_x = rad_u * dx_du; nodefreq_u = 1 / nodedist_u; /* mu = lu - 1; */ filter = new FILTER[lx]; nmax = 0; ulomin = c_maxint - 1; uhimax = c_minint + 1; for (x = 0; x < lx; x++) { f = filter + x; u_ = (x - delta_x) * du_dx; ulo = intGT(u_ - rad_u); uhi = intLT(u_ + rad_u); /* NOT_LESS_THAN( 0, ulo) NOT_MORE_THAN(mu, uhi) */ m = uhi - ulo + 1; if (m > 0) { f->w_base = new float[m]; f->w = f->w_base - ulo; for (sum = 0.0, u = ulo; u <= uhi; u++) { w = (*flt_fun)((u - u_) * nodefreq_u); sum += w; f->w[u] = (float)w; } for (; ulo <= uhi; ulo++) if (f->w[ulo]) break; for (; uhi >= ulo; uhi--) if (f->w[uhi]) break; if (ulo < ulomin) ulomin = ulo; if (uhi > uhimax) uhimax = uhi; n = uhi - ulo + 1; if (n > nmax) nmax = n; f->first = ulo; f->last = uhi; norm = 1 / sum; for (u = ulo; u <= uhi; u++) f->w[u] *= (float)norm; } else { f->w_base = 0; f->first = ulo; f->last = uhi; } } xrad = rad_x; umin = ulomin; umax = uhimax; uwidth = nmax; return filter; } //----------------------------------------------------------------------------- static NOCALC *create_nocalc(TRop::ResampleFilterType flt_type, double blur, double dx_du, double delta_x, int lx, int umin, int umax, int &xwidth) { /* Il nocalc serve a stabilire che un insieme di pixel u (di ingresso) non ha bisogno di essere calcolato, perche tutti i pixel x (di uscita) su cui questo insieme si distribuisce non hanno bisogno di essere calcolati. Il significato del nocalc a coordinata x e': se arrivati a x si e' trovata una sequenza di width pixel x che non e' necessario calcolare, allora non e' necessario calcolare tutti i pixel u da nocalc->first a nocalc->last. Per primo va calcolata la width. Deve essere garantito che tutti i pixel u siano coperti dal vettore di nocalc. Un pixel u viene usato da un intervallo di x largo quanto il filtro (in x), cioe' un intervallo aperto (-radx_,radx_) intorno a x(u). Aggiungendo una unita' x a questa larghezza si ha una larghezza tale che se tutti i pixel x sono nocalc, un intervallo largo 1 in unita' x di pixel u non necessita di essere calcolato. Vogliamo che ulo_ <= first <= last < uhi_ con uhi_ = ulo_ + u(1). Devono essere nocalc almeno gli x in (x(ulo_)-radx_, x(uhi_)+radx_). Poniamo x = x(uhi_)+radx_-1. uhi_ = u(x-radx_+1) ulo_ = u(x-radx_) x(ulo_)-radx_ = x-2*radx_ ma questo punto e' escluso, quindi l'intero GT e' x - width + 1 = INT_LE (x-2*radx_+1) 1 - INT_LE (-2*radx_+1) = width 1 + INT_GE (2*radx_-1) = width INT_GE (2*radx_) = width Pero' per sicurezza facciamo INT_GT (2*radx_) = width */ NOCALC *nocalc; int width; double flt_rad; double rad_x; double du_dx; double ulo_, uhi_; int ulo, uhi; int x; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif du_dx = 1 / dx_du; get_flt_fun_rad(flt_type, 0, flt_rad); if (dx_du > 1) /* sto ingrandendo */ rad_x = flt_rad * blur * dx_du; else rad_x = flt_rad * blur; rad_x += 0.5; /* ?!?!?!?!? */ width = intGT(2 * rad_x + 1); nocalc = new NOCALC[lx + width - 1]; for (x = 0; x < lx + width - 1; x++) { ulo_ = (x - rad_x - delta_x) * du_dx; uhi_ = ulo_ + du_dx; ulo = intGE(ulo_); uhi = intLT(uhi_); nocalc[x].first = std::max(umin, ulo); nocalc[x].last = std::min(umax, uhi); } xwidth = width; return nocalc; } //--------------------------------------------------------------------------- /* inline UINT calcValueInit(UINT init_value){ return init_value;//0xffffU; } */ /* inline void calcValueInit(UINT &calc_value){ calc_value = 0xffffU; } inline bool calcValueEmpty(UINT calc_value){ return calc_value == 0xffffU; } inline bool calcValueReady(UINT calc_value){ return calc_value <= 0x1ffU; } inline void calcValueAdvance(UINT &calc_value){ calc_value >>= 1; } inline void calcValueNoCalc(UINT &calc_value){ calc_value &= ~0x80U; } */ #define CALC_VALUE_INIT \ { calc_value = 0xffffU; } #define CALC_VALUE_EMPTY (calc_value == 0xffffU) #define CALC_VALUE_READY (calc_value <= 0x1ffU) #define CALC_VALUE_ADVANCE \ { calc_value >>= 1; } #define CALC_VALUE_NOCALC \ { calc_value &= ~0x80U; } template #ifdef _MSC_VER __forceinline #endif void ResampleCalcAlgo(PixType *buffer_in, int lu, int lv, int wrap_in, int max_pix_ref_u, int min_pix_ref_u, int max_pix_ref_v, int min_pix_ref_v, UCHAR *calc, int calc_bytesize, int calc_bytewrap) /* lu = width lv = height wrap_in = wrap */ { PixType *prev_line_in; PixType *last_line_in; PixType prev_value; PixType left_value; PixType last_value; UINT calc_value; UCHAR *calc_byte = 0; int goodcols; std::unique_ptr col_height(new int[lu]); int ref_u, ref_v; int filter_diam_u = max_pix_ref_u - min_pix_ref_u + 1; int filter_diam_v = max_pix_ref_v - min_pix_ref_v + 1; int last_u, last_v; int *ch; int *ch_end; assert(col_height); CALC_VALUE_INIT ch = col_height.get(); ch_end = ch + lu; while (ch < ch_end) { *ch = filter_diam_v; ++ch; } last_line_in = buffer_in; for (last_v = 1, ref_v = last_v - max_pix_ref_v; ref_v < 0; last_v++, ref_v++) { prev_line_in = last_line_in; last_line_in = buffer_in + last_v * wrap_in; for (last_u = 0; last_u < lu; last_u++) { last_value = last_line_in[last_u]; prev_value = prev_line_in[last_u]; if (last_value == prev_value) col_height[last_u]++; else col_height[last_u] = 1; } } for (; last_v < lv; last_v++, ref_v++) { prev_line_in = last_line_in; last_line_in = buffer_in + last_v * wrap_in; last_value = last_line_in[0]; goodcols = 0; for (last_u = 0, ref_u = last_u - max_pix_ref_u; ref_u < 0; last_u++, ref_u++) { left_value = last_value; last_value = last_line_in[last_u]; prev_value = prev_line_in[last_u]; if (last_value == prev_value) { col_height[last_u]++; if (col_height[last_u] >= filter_diam_v) if (last_value == left_value) goodcols++; else goodcols = 1; else goodcols = 0; } else { col_height[last_u] = 1; goodcols = 0; } } calc_byte = calc + calc_bytewrap * ref_v; CALC_VALUE_INIT for (; last_u < lu; last_u++, ref_u++) { left_value = last_value; last_value = last_line_in[last_u]; prev_value = prev_line_in[last_u]; if (last_value == prev_value) { col_height[last_u]++; if (col_height[last_u] >= filter_diam_v) if (last_value == left_value) { goodcols++; if (goodcols >= filter_diam_u) CALC_VALUE_NOCALC } else goodcols = 1; else goodcols = 0; } else { col_height[last_u] = 1; goodcols = 0; } if (CALC_VALUE_READY) { *calc_byte++ = (UCHAR)calc_value; CALC_VALUE_INIT } else CALC_VALUE_ADVANCE } for (; ref_u < lu; last_u++, ref_u++) { if (CALC_VALUE_READY) { *calc_byte++ = (UCHAR)calc_value; CALC_VALUE_INIT } else CALC_VALUE_ADVANCE } if (!CALC_VALUE_EMPTY) { while (!CALC_VALUE_READY) CALC_VALUE_ADVANCE *calc_byte++ = (UCHAR)calc_value; } } for (; ref_v < lv; last_v++, ref_v++) { for (last_u = 0, ref_u = last_u - max_pix_ref_u; ref_u < 0; last_u++, ref_u++) { } calc_byte = calc + calc_bytewrap * ref_v; CALC_VALUE_INIT for (; last_u < lu; last_u++, ref_u++) { if (CALC_VALUE_READY) { *calc_byte++ = (UCHAR)calc_value; CALC_VALUE_INIT } else CALC_VALUE_ADVANCE } for (; ref_u < lu; last_u++, ref_u++) { if (CALC_VALUE_READY) { *calc_byte++ = (UCHAR)calc_value; CALC_VALUE_INIT } else CALC_VALUE_ADVANCE } if (!CALC_VALUE_EMPTY) { while (!CALC_VALUE_READY) CALC_VALUE_ADVANCE *calc_byte++ = (UCHAR)calc_value; } } assert(!calc_byte || calc_byte == calc + calc_bytesize); } /*---------------------------------------------------------------------------*/ template void create_calc(const TRasterPT &rin, int min_pix_ref_u, int max_pix_ref_u, int min_pix_ref_v, int max_pix_ref_v, UCHAR *&p_calc, int &p_calc_allocsize, int &p_calc_bytewrap) { UCHAR *calc; int lu, lv; int wrap_in; int calc_bytesize; int calc_bytewrap; lu = rin->getLx(); lv = rin->getLy(); wrap_in = rin->getWrap(); p_calc_bytewrap = (lu + 7) >> 3; // ceil(lu/8) calc_bytewrap = p_calc_bytewrap; calc_bytesize = calc_bytewrap * lv; // lv * ceil(lu/8) if (calc_bytesize > p_calc_allocsize) { if (p_calc_allocsize) delete[](p_calc); // TMALLOC (*p_calc, calc_bytesize) p_calc = new UCHAR[calc_bytesize]; assert(p_calc); memset(p_calc, 0xff, calc_bytesize); p_calc_allocsize = calc_bytesize; } calc = p_calc; if (lu < max_pix_ref_u + 1 || lv < max_pix_ref_v + 1) { memset(calc, 0xff, calc_bytesize); return; } // RESAMPLE_CALC_ALGO ResampleCalcAlgo(rin->pixels(), lu, lv, wrap_in, max_pix_ref_u, min_pix_ref_u, max_pix_ref_v, min_pix_ref_v, calc, calc_bytesize, calc_bytewrap); } //--------------------------------------------------------------------------- namespace { template class Converter { public: static inline T convert(const TPixel32 &pixin) { return pixin; } }; #define BYTE_FROM_USHORT(u) (((256U * 255U + 1U) * u + (1 << 23)) >> 24) #define USHORT_FROM_BYTE(u) (u | u << 8) template <> class Converter { public: static inline TPixel64 convert(const TPixel32 &pix) { return TPixel64(USHORT_FROM_BYTE(pix.r), USHORT_FROM_BYTE(pix.g), USHORT_FROM_BYTE(pix.b), USHORT_FROM_BYTE(pix.m)); } }; //----------------------------------------------------------------------------- inline double get_filter_value(TRop::ResampleFilterType flt_type, double x) { // it is assumed that x != 0 (not checked only for speed reasons) switch (flt_type) { case TRop::Triangle: if (x < -1.0) return 0.0; if (x < 0.0) return 1.0 + x; if (x < 1.0) return 1.0 - x; return 0.0; case TRop::Mitchell: { static double p0, p2, p3, q0, q1, q2, q3; if (!p0) { const double b = 1.0 / 3.0; const double c = 1.0 / 3.0; p0 = (6.0 - 2.0 * b) / 6.0; p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0; p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0; q0 = (8.0 * b + 24.0 * c) / 6.0; q1 = (-12.0 * b - 48.0 * c) / 6.0; q2 = (6.0 * b + 30.0 * c) / 6.0; q3 = (-b - 6.0 * c) / 6.0; } if (x < -2.0) return 0.0; if (x < -1.0) return (q0 - x * (q1 - x * (q2 - x * q3))); if (x < 0.0) return (p0 + x * x * (p2 - x * p3)); if (x < 1.0) return (p0 + x * x * (p2 + x * p3)); if (x < 2.0) return (q0 + x * (q1 + x * (q2 + x * q3))); break; } case TRop::Cubic5: if (x < 0.0) x = -x; if (x < 1.0) return 2.5 * x * x * x - 3.5 * x * x + 1; if (x < 2.0) return 0.5 * x * x * x - 2.5 * x * x + 4 * x - 2; break; case TRop::Cubic75: if (x < 0.0) x = -x; if (x < 1.0) return 2.75 * x * x * x - 3.75 * x * x + 1; if (x < 2.0) return 0.75 * x * x * x - 3.75 * x * x + 6 * x - 3; break; case TRop::Cubic1: if (x < 0.0) x = -x; if (x < 1.0) return 3 * x * x * x - 4 * x * x + 1; if (x < 2.0) return x * x * x - 5 * x * x + 8 * x - 4; break; case TRop::Hann2: if (x <= -2.0) return 0.0; if (x < 2.0) return sinc0(x, 1) * (0.5 + 0.5 * cos(M_PI_2 * x)); break; case TRop::Hann3: if (x <= -3.0) return 0.0; if (x < 3.0) return sinc0(x, 1) * (0.5 + 0.5 * cos(M_PI_3 * x)); break; case TRop::Hamming2: if (x <= -2.0) return 0.0; if (x < 2.0) return sinc0(x, 1) * (0.54 + 0.46 * cos(M_PI_2 * x)); break; case TRop::Hamming3: if (x <= -3.0) return 0.0; if (x < 3.0) return sinc0(x, 1) * (0.54 + 0.46 * cos(M_PI_3 * x)); break; case TRop::Lanczos2: if (x <= -2.0) return 0.0; if (x < 2.0) return sinc0(x, 1) * sinc0(x, 2); break; case TRop::Lanczos3: if (x <= -3.0) return 0.0; if (x < 3.0) return sinc0(x, 1) * sinc0(x, 3); break; case TRop::Gauss: if (x <= -2.0) return 0.0; if (x < 2.0) return exp(-M_PI * x * x); /* exp(-M_PI*2*2)~=3.5*10^-6 */ break; default: assert(!"bad filter type"); break; } return 0.0; } //--------------------------------------------------------------------------- template void resample_clear_rgbm(TRasterPT rout, T default_value) { T *buffer_out; buffer_out = rout->pixels(); for (int out_y = 0; out_y < rout->getLy(); out_y++) for (int out_x = 0; out_x < rout->getLx(); out_x++) buffer_out[out_x + out_y * rout->getWrap()] = default_value; } //--------------------------------------------------------------------------- template void resample_main_rgbm(TRasterPT rout, const TRasterPT &rin, const TAffine &aff_xy2uv, const TAffine &aff0_uv2fg, int min_pix_ref_u, int min_pix_ref_v, int max_pix_ref_u, int max_pix_ref_v, int n_pix, int *pix_ref_u, int *pix_ref_v, int *pix_ref_f, int *pix_ref_g, short *filter) { const T *buffer_in; T *buffer_out; T *pix_out; int lu, lv, wrap_in, mu, mv; int lx, ly, wrap_out; int out_x, out_y; double out_x_, out_y_; double out_u_, out_v_; int ref_u, ref_v; int pix_u, pix_v; double ref_out_u_, ref_out_v_; double ref_out_f_, ref_out_g_; int ref_out_f, ref_out_g; int pix_out_f, pix_out_g; int filter_mu, filter_mv; UINT inside_limit_u, inside_limit_v; int inside_nonempty; int outside_min_u, outside_min_v; int outside_max_u, outside_max_v; UCHAR *calc; int calc_allocsize; int calc_bytewrap; UCHAR calc_value; bool must_calc; T pix_value, default_value(0, 0, 0, 0); SUMS_TYPE weight, sum_weights; double inv_sum_weights; SUMS_TYPE sum_contribs_r, sum_contribs_g, sum_contribs_b, sum_contribs_m; double out_fval_r, out_fval_g, out_fval_b, out_fval_m; int out_value_r, out_value_g, out_value_b, out_value_m; int i; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif if (!(rout->getLx() > 0 && rout->getLy() > 0)) return; if (!(rin->getLx() > 0 && rin->getLy() > 0)) { rout->clear(); return; } calc = 0; calc_allocsize = 0; // Create a bit array, each indicating whether a pixel has to be calculated or // not create_calc(rin, min_pix_ref_u, max_pix_ref_u, min_pix_ref_v, max_pix_ref_v, calc, calc_allocsize, calc_bytewrap); buffer_in = rin->pixels(); buffer_out = rout->pixels(); lu = rin->getLx(); lx = rout->getLx(); lv = rin->getLy(); ly = rout->getLy(); wrap_in = rin->getWrap(); wrap_out = rout->getWrap(); mu = lu - 1; mv = lv - 1; filter_mu = max_pix_ref_u - min_pix_ref_u; filter_mv = max_pix_ref_v - min_pix_ref_v; inside_limit_u = lu - filter_mu; inside_limit_v = lv - filter_mv; inside_nonempty = (int)inside_limit_u > 0 && (int)inside_limit_v > 0; outside_min_u = -max_pix_ref_u; outside_min_v = -max_pix_ref_v; outside_max_u = mu - min_pix_ref_u; outside_max_v = mv - min_pix_ref_v; // For every pixel of the output image for (out_y = 0, out_y_ = 0.5; out_y < ly; out_y++, out_y_ += 1.0) { for (out_x = 0, out_x_ = 0.5; out_x < lx; out_x++, out_x_ += 1.0) { pix_out = buffer_out + out_y * wrap_out + out_x; // Take the pre-image of the pixel through the passed affine out_u_ = affMV1(aff_xy2uv, out_x_, out_y_); out_v_ = affMV2(aff_xy2uv, out_x_, out_y_); // Convert to integer coordinates ref_u = intLE(out_u_); ref_v = intLE(out_v_); // NOTE: The following condition is equivalent to: // (ref_u + min_pix_ref_u >= 0 && ref_v + min_pix_ref_v >= 0 && // ref_u + max_pix_ref_u < lu && ref_v + max_pix_ref_v < lv) // - since the presence of (UINT) makes integeres < 0 become >> 0 if (inside_nonempty && (UINT)(ref_u + min_pix_ref_u) < inside_limit_u && (UINT)(ref_v + min_pix_ref_v) < inside_limit_v) { // The filter mask starting around (ref_u, ref_v) is completely // contained // in the source raster // Get the calculation array mask byte calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; if (calc_value && ((calc_value >> (ref_u & 7)) & 1)) // If the mask bit for this pixel is on { ref_out_u_ = ref_u - out_u_; // Fractionary part of the pre-image ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); // Make the image of it into fg ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); // Convert to integer coordinates ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; // Make the weighted sum of source pixels for (i = n_pix - 1; i >= 0; --i) { // Build the weight for this pixel pix_out_f = pix_ref_f[i] + ref_out_f; // image of the integer part // + that of the fractionary // part pix_out_g = pix_ref_g[i] + ref_out_g; weight = (filter[pix_out_f] * filter[pix_out_g]) >> 16; // Add the weighted pixel contribute pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; pix_value = buffer_in[pix_u + pix_v * wrap_in]; sum_contribs_r += (SUMS_TYPE)pix_value.r * weight; sum_contribs_g += (SUMS_TYPE)pix_value.g * weight; sum_contribs_b += (SUMS_TYPE)pix_value.b * weight; sum_contribs_m += (SUMS_TYPE)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(T::maxChannelValue, out_value_r); notMoreThan(T::maxChannelValue, out_value_g); notMoreThan(T::maxChannelValue, out_value_b); notMoreThan(T::maxChannelValue, out_value_m); pix_out->r = out_value_r; pix_out->g = out_value_g; pix_out->b = out_value_b; pix_out->m = out_value_m; } else // The pixel is copied from the corresponding source... *pix_out = buffer_in[ref_u + ref_v * wrap_in]; } else if (outside_min_u <= ref_u && ref_u <= outside_max_u && outside_min_v <= ref_v && ref_v <= outside_max_v) { if ((UINT)ref_u >= (UINT)lu || (UINT)ref_v >= (UINT)lv) must_calc = true; else { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; must_calc = calc_value && ((calc_value >> (ref_u & 7)) & 1); } if (must_calc) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; --i) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (filter[pix_out_f] * filter[pix_out_g]) >> 16; pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; if (pix_u < 0 || pix_u > mu || pix_v < 0 || pix_v > mv) { sum_weights += weight; // 0-padding continue; } notLessThan(0, pix_u); // Copy-padding notLessThan(0, pix_v); notMoreThan(mu, pix_u); notMoreThan(mv, pix_v); pix_value = buffer_in[pix_u + pix_v * wrap_in]; sum_contribs_r += (SUMS_TYPE)pix_value.r * weight; sum_contribs_g += (SUMS_TYPE)pix_value.g * weight; sum_contribs_b += (SUMS_TYPE)pix_value.b * weight; sum_contribs_m += (SUMS_TYPE)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(T::maxChannelValue, out_value_r); notMoreThan(T::maxChannelValue, out_value_g); notMoreThan(T::maxChannelValue, out_value_b); notMoreThan(T::maxChannelValue, out_value_m); pix_out->r = out_value_r; pix_out->g = out_value_g; pix_out->b = out_value_b; pix_out->m = out_value_m; } else *pix_out = buffer_in[ref_u + ref_v * wrap_in]; } else *pix_out = default_value; } } delete[] calc; } //--------------------------------------------------------------------------- #ifdef USE_SSE2 namespace { DV_ALIGNED(16) class TPixelFloat { public: TPixelFloat() : b(0), g(0), r(0), m(0) {} TPixelFloat(float rr, float gg, float bb, float mm) : b(bb), g(gg), r(rr), m(mm) {} TPixelFloat(const TPixel32 &pix) : b(pix.b), g(pix.g), r(pix.r), m(pix.m) {} float b, g, r, m; }; } // anonymous namespace //--------------------------------------------------------------------------- template void resample_main_rgbm_SSE2(TRasterPT rout, const TRasterPT &rin, const TAffine &aff_xy2uv, const TAffine &aff0_uv2fg, int min_pix_ref_u, int min_pix_ref_v, int max_pix_ref_u, int max_pix_ref_v, int n_pix, int *pix_ref_u, int *pix_ref_v, int *pix_ref_f, int *pix_ref_g, short *filter) { __m128i zeros = _mm_setzero_si128(); const T *buffer_in; T *buffer_out; int lu, lv, wrap_in, mu, mv; int lx, ly, wrap_out; int out_x, out_y; double out_x_, out_y_; double out_u_, out_v_; int ref_u, ref_v; int pix_u, pix_v; double ref_out_u_, ref_out_v_; double ref_out_f_, ref_out_g_; int ref_out_f, ref_out_g; int pix_out_f, pix_out_g; int filter_mu, filter_mv; UINT inside_limit_u, inside_limit_v; int inside_nonempty; // double outside_min_u_, outside_min_v_; // double outside_max_u_, outside_max_v_; int outside_min_u, outside_min_v; int outside_max_u, outside_max_v; UCHAR *calc; int calc_allocsize; int calc_bytewrap; UCHAR calc_value; bool must_calc; T pix_value; T default_value(0, 0, 0, 0); float weight; float sum_weights; float inv_sum_weights; int i; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif T *pix_out; __m128 sum_contribs_packed; __m128i pix_value_packed_i; __m128 pix_value_packed; __m128 weight_packed; __m128 zeros2 = _mm_setzero_ps(); float maxChannelValue = (float)T::maxChannelValue; __m128 maxChanneValue_packed = _mm_load1_ps(&maxChannelValue); if (!(rout->getLx() > 0 && rout->getLy() > 0)) return; if (!(rin->getLx() > 0 && rin->getLy() > 0)) { resample_clear_rgbm(rout, default_value); return; } calc = 0; calc_allocsize = 0; create_calc(rin, min_pix_ref_u, max_pix_ref_u, min_pix_ref_v, max_pix_ref_v, calc, calc_allocsize, calc_bytewrap); buffer_in = rin->pixels(); buffer_out = rout->pixels(); lu = rin->getLx(); lx = rout->getLx(); lv = rin->getLy(); ly = rout->getLy(); wrap_in = rin->getWrap(); wrap_out = rout->getWrap(); mu = lu - 1; mv = lv - 1; filter_mu = max_pix_ref_u - min_pix_ref_u; filter_mv = max_pix_ref_v - min_pix_ref_v; inside_limit_u = lu - filter_mu; inside_limit_v = lv - filter_mv; inside_nonempty = (int)inside_limit_u > 0 && (int)inside_limit_v > 0; outside_min_u = -max_pix_ref_u; outside_min_v = -max_pix_ref_v; outside_max_u = mu - min_pix_ref_u; outside_max_v = mv - min_pix_ref_v; for (out_y = 0, out_y_ = 0.5; out_y < ly; out_y++, out_y_ += 1.0) { for (out_x = 0, out_x_ = 0.5; out_x < lx; out_x++, out_x_ += 1.0) { pix_out = buffer_out + out_y * wrap_out + out_x; out_u_ = affMV1(aff_xy2uv, out_x_, out_y_); out_v_ = affMV2(aff_xy2uv, out_x_, out_y_); ref_u = intLE(out_u_); ref_v = intLE(out_v_); if (inside_nonempty && (UINT)(ref_u + min_pix_ref_u) < inside_limit_u && (UINT)(ref_v + min_pix_ref_v) < inside_limit_v) { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; if (calc_value && ((calc_value >> (ref_u & 7)) & 1)) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_packed = _mm_setzero_ps(); for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (float)((filter[pix_out_f] * filter[pix_out_g]) >> 16); pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; pix_value = buffer_in[pix_u + pix_v * wrap_in]; pix_value_packed_i = _mm_unpacklo_epi8( _mm_cvtsi32_si128(*(DWORD *)&pix_value), zeros); pix_value_packed = _mm_cvtepi32_ps(_mm_unpacklo_epi16(pix_value_packed_i, zeros)); weight_packed = _mm_load1_ps(&weight); sum_contribs_packed = _mm_add_ps(sum_contribs_packed, _mm_mul_ps(pix_value_packed, weight_packed)); sum_weights += weight; } inv_sum_weights = 1.0f / sum_weights; __m128 inv_sum_weights_packed = _mm_load1_ps(&inv_sum_weights); __m128 out_fval_packed = _mm_mul_ps(sum_contribs_packed, inv_sum_weights_packed); out_fval_packed = _mm_max_ps(out_fval_packed, zeros2); out_fval_packed = _mm_min_ps(out_fval_packed, maxChanneValue_packed); __m128i out_value_packed_i = _mm_cvtps_epi32(out_fval_packed); out_value_packed_i = _mm_packs_epi32(out_value_packed_i, zeros); out_value_packed_i = _mm_packus_epi16(out_value_packed_i, zeros); *(DWORD *)(pix_out) = _mm_cvtsi128_si32(out_value_packed_i); } else *pix_out = buffer_in[ref_u + ref_v * wrap_in]; } else // if( outside_min_u_ <= out_u_ && out_u_ <= outside_max_u_ && // outside_min_v_ <= out_v_ && out_v_ <= outside_max_v_ ) if (outside_min_u <= ref_u && ref_u <= outside_max_u && outside_min_v <= ref_v && ref_v <= outside_max_v) { if ((UINT)ref_u >= (UINT)lu || (UINT)ref_v >= (UINT)lv) must_calc = true; else { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; must_calc = calc_value && ((calc_value >> (ref_u & 7)) & 1); } if (must_calc) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_packed = _mm_setzero_ps(); for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (float)((filter[pix_out_f] * filter[pix_out_g]) >> 16); pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; if (pix_u < 0 || pix_u > mu || pix_v < 0 || pix_v > mv) { sum_weights += weight; continue; } notLessThan(0, pix_u); notLessThan(0, pix_v); notMoreThan(mu, pix_u); notMoreThan(mv, pix_v); pix_value = buffer_in[pix_u + pix_v * wrap_in]; pix_value_packed_i = _mm_unpacklo_epi8( _mm_cvtsi32_si128(*(DWORD *)&pix_value), zeros); pix_value_packed = _mm_cvtepi32_ps(_mm_unpacklo_epi16(pix_value_packed_i, zeros)); weight_packed = _mm_load1_ps(&weight); sum_contribs_packed = _mm_add_ps(sum_contribs_packed, _mm_mul_ps(pix_value_packed, weight_packed)); sum_weights += weight; } inv_sum_weights = 1.0f / sum_weights; __m128 inv_sum_weights_packed = _mm_load1_ps(&inv_sum_weights); __m128 out_fval_packed = _mm_mul_ps(sum_contribs_packed, inv_sum_weights_packed); out_fval_packed = _mm_max_ps(out_fval_packed, zeros2); out_fval_packed = _mm_min_ps(out_fval_packed, maxChanneValue_packed); __m128i out_value_packed_i = _mm_cvtps_epi32(out_fval_packed); out_value_packed_i = _mm_packs_epi32(out_value_packed_i, zeros); out_value_packed_i = _mm_packus_epi16(out_value_packed_i, zeros); *(DWORD *)(pix_out) = _mm_cvtsi128_si32(out_value_packed_i); } else *pix_out = buffer_in[ref_u + ref_v * wrap_in]; } else { *pix_out = default_value; } } } if (calc) delete[] calc; } namespace { //--------------------------------------------------------------------------- void inline blendBySSE2(TPixel32 *pix_out, float *ink, float *paint, float *tone, const __m128 &maxtone_packed, const __m128i &zeros) { __m128 a_packed = _mm_load_ps(ink); __m128 b_packed = _mm_load_ps(paint); __m128 num_packed = _mm_load1_ps(tone); __m128 diff_packed = _mm_sub_ps(maxtone_packed, num_packed); // calcola in modo vettoriale out = ((den-num)*a + num*b)/den __m128 pix_value_packed = _mm_mul_ps(diff_packed, a_packed); __m128 tmpPix_packed = _mm_mul_ps(num_packed, b_packed); pix_value_packed = _mm_add_ps(pix_value_packed, tmpPix_packed); pix_value_packed = _mm_div_ps(pix_value_packed, maxtone_packed); // converte i canali da float a char __m128i pix_value_packed_i = _mm_cvtps_epi32(pix_value_packed); pix_value_packed_i = _mm_packs_epi32(pix_value_packed_i, zeros); pix_value_packed_i = _mm_packus_epi16(pix_value_packed_i, zeros); *(DWORD *)(pix_out) = _mm_cvtsi128_si32(pix_value_packed_i); } //--------------------------------------------------------------------------- void inline blendBySSE2(__m128 &pix_out_packed, float *ink, float *paint, float *tone, const __m128 &maxtone_packed, const __m128i &zeros) { __m128 a_packed = _mm_load_ps(ink); __m128 b_packed = _mm_load_ps(paint); __m128 num_packed = _mm_load1_ps(tone); __m128 diff_packed = _mm_sub_ps(maxtone_packed, num_packed); // calcola in modo vettoriale out = ((den-num)*a + num*b)/den pix_out_packed = _mm_mul_ps(diff_packed, a_packed); __m128 tmpPix_packed = _mm_mul_ps(num_packed, b_packed); pix_out_packed = _mm_add_ps(pix_out_packed, tmpPix_packed); pix_out_packed = _mm_div_ps(pix_out_packed, maxtone_packed); } } // namespace #endif // _WIN32 //--------------------------------------------------------------------------- static void get_prow_gr8(const TRasterGR8P &rin, double a11, double a12, double a21, double a22, int pmin, int pmax, int q, float *prow) { UCHAR *bufin_gr8, *in_gr8; int u, v; int p, p1, p2; UINT lu, lv; UINT mu, mv; int du, dv; double u_0, v_0; double u_, v_; double fu, fv; double gu, gv; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif #ifdef BORDER #undef BORDER #endif #define BORDER BORDER_GR8 bufin_gr8 = (UCHAR *)rin->pixels(); lu = rin->getLx(); mu = lu - 1; lv = rin->getLy(); mv = lv - 1; du = 1; dv = rin->getWrap(); u_0 = a12 * q; v_0 = a22 * q; for (p = pmin; p <= pmax; p++) if (!prow[p]) { u_ = u_0 + a11 * p; u = tfloor(u_); v_ = v_0 + a21 * p; v = tfloor(v_); if ((UINT)u < mu && (UINT)v < mv) break; fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; in_gr8 = bufin_gr8 + (u * du + v * dv); prow[p] = (float)troundp( fu * gv * (((UINT)(u + 1) < lu && (UINT)v < lv) ? in_gr8[du] : BORDER) + fu * fv * (((UINT)(u + 1) < lu && (UINT)(v + 1) < lv) ? in_gr8[du + dv] : BORDER) + gu * gv * (((UINT)u < lu && (UINT)v < lv) ? in_gr8[0] : BORDER) + gu * fv * (((UINT)u < lu && (UINT)(v + 1) < lv) ? in_gr8[dv] : BORDER)); } p1 = p; for (p = pmax; p > p1; p--) if (!prow[p]) { u_ = u_0 + a11 * p; u = tfloor(u_); v_ = v_0 + a21 * p; v = tfloor(v_); if ((UINT)u < mu && (UINT)v < mv) break; fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; in_gr8 = bufin_gr8 + (u * du + v * dv); prow[p] = (float)troundp( fu * gv * (((UINT)(u + 1) < lu && (UINT)v < lv) ? in_gr8[du] : BORDER) + fu * fv * (((UINT)(u + 1) < lu && (UINT)(v + 1) < lv) ? in_gr8[du + dv] : BORDER) + gu * gv * (((UINT)u < lu && (UINT)v < lv) ? in_gr8[0] : BORDER) + gu * fv * (((UINT)u < lu && (UINT)(v + 1) < lv) ? in_gr8[dv] : BORDER)); } p2 = p; for (p = p1; p <= p2; p++) if (!prow[p]) { u_ = u_0 + a11 * p; u = (int)(u_); v_ = v_0 + a21 * p; v = (int)(v_); fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; in_gr8 = bufin_gr8 + (u * du + v * dv); prow[p] = (float)troundp(fu * gv * in_gr8[du] + fu * fv * in_gr8[du + dv] + gu * gv * in_gr8[0] + gu * fv * in_gr8[dv]); } } //--------------------------------------------------------------------------- #define grey(PIXEL) (TPixelGR8::from(PIXEL).value) static void get_prow_gr8(const TRaster32P &rin, double a11, double a12, double a21, double a22, int pmin, int pmax, int q, float *prow) { TPixel *bufin_32, *in_32; int u, v; int p, p1, p2; UINT lu, lv; UINT mu, mv; int du, dv; double u_0, v_0; double u_, v_; double fu, fv; double gu, gv; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif #ifdef BORDER #undef BORDER #endif #define BORDER BORDER_GR8 bufin_32 = (TPixel *)rin->pixels(); lu = rin->getLx(); mu = lu - 1; lv = rin->getLy(); mv = lv - 1; du = 1; dv = rin->getWrap(); u_0 = a12 * q; v_0 = a22 * q; for (p = pmin; p <= pmax; p++) if (!prow[p]) { u_ = u_0 + a11 * p; u = tfloor(u_); v_ = v_0 + a21 * p; v = tfloor(v_); if ((UINT)u < mu && (UINT)v < mv) break; fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; in_32 = bufin_32 + (u * du + v * dv); prow[p] = (float)troundp( fu * gv * (((UINT)(u + 1) < lu && (UINT)v < lv) ? grey(in_32[du]) : BORDER) + fu * fv * (((UINT)(u + 1) < lu && (UINT)(v + 1) < lv) ? grey(in_32[du + dv]) : BORDER) + gu * gv * (((UINT)u < lu && (UINT)v < lv) ? grey(in_32[0]) : BORDER) + gu * fv * (((UINT)u < lu && (UINT)(v + 1) < lv) ? grey(in_32[dv]) : BORDER)); } p1 = p; for (p = pmax; p > p1; p--) if (!prow[p]) { u_ = u_0 + a11 * p; u = tfloor(u_); v_ = v_0 + a21 * p; v = tfloor(v_); if ((UINT)u < mu && (UINT)v < mv) break; fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; in_32 = bufin_32 + (u * du + v * dv); prow[p] = (float)troundp( fu * gv * (((UINT)(u + 1) < lu && (UINT)v < lv) ? grey(in_32[du]) : BORDER) + fu * fv * (((UINT)(u + 1) < lu && (UINT)(v + 1) < lv) ? grey(in_32[du + dv]) : BORDER) + gu * gv * (((UINT)u < lu && (UINT)v < lv) ? grey(in_32[0]) : BORDER) + gu * fv * (((UINT)u < lu && (UINT)(v + 1) < lv) ? grey(in_32[dv]) : BORDER)); } p2 = p; for (p = p1; p <= p2; p++) if (!prow[p]) { u_ = u_0 + a11 * p; u = (int)(u_); v_ = v_0 + a21 * p; v = (int)(v_); fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; in_32 = bufin_32 + (u * du + v * dv); prow[p] = (float)troundp( fu * gv * grey(in_32[du]) + fu * fv * grey(in_32[du + dv]) + gu * gv * grey(in_32[0]) + gu * fv * grey(in_32[dv])); } } //--------------------------------------------------------------------------- typedef float *MyFloatPtr; static void rop_resample_gr8(const TRasterGR8P &rin, TRasterGR8P rout, const TAffine &aff, const TAffine &invrot, FILTER *rowflt, int pmin, int pmax, FILTER *colflt, int qmin, int qmax, int nrows, int flatradu, int flatradv, double flatradx_, double flatrady_, NOCALC *rownoc, int nocdiamx, NOCALC *colnoc, int nocdiamy) { FILTER *xflt, *yflt; UCHAR *bufin_gr8, *bufout_gr8, *in_gr8, *out_gr8; float *prow_base, *prow, **xrow_base, **xrow, *xxx, tmp; double x_, y_; int u, v; //, vw; int p, q; int x, y; int lu, lv, mu, mv; int lx, ly, mx, my; // int dudp, dudq, dvdp, dvdq; int topq, topy; int wrapin, wrapout; int flatdiamu, flatdiamv; int xlo, xhi, ylo, yhi; int *nocheight; int nocwidth; int i, j; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif bufin_gr8 = (UCHAR *)rin->pixels(); bufout_gr8 = (UCHAR *)rout->pixels(); wrapin = rin->getWrap(); wrapout = rout->getWrap(); lu = rin->getLx(); mu = lu - 1; lv = rin->getLy(); mv = lv - 1; lx = rout->getLx(); mx = lx - 1; ly = rout->getLy(); my = ly - 1; prow_base = new float[pmax - pmin + 1]; prow = prow_base - pmin; xrow_base = new MyFloatPtr[qmax - (qmin - nrows) + 1]; xrow = xrow_base - (qmin - nrows); topq = qmin; // app = xrow+topq-nrows; i = 0; j = 3; for (i = 0; i < nrows; i++) *(xrow + topq - nrows + i) = new float[lx]; // while(app= flatdiamv) if (colval[u] == flatval) flatcols++; else flatcols = 1; else flatcols = 0; flatval = colval[u]; if (flatcols >= flatdiamu) { #ifdef VECCHIA_MANIERA x_ = AFF_M_V_1(aff, u - flatradu, v - flatradv); y_ = AFF_M_V_2(aff, u - flatradu, v - flatradv); xlo = CEIL(x_ - flatradx_); xhi = FLOOR(x_ + flatradx_); ylo = CEIL(y_ - flatrady_); yhi = FLOOR(y_ + flatrady_); NOT_LESS_THAN(0, xlo); NOT_MORE_THAN(mx, xhi); NOT_LESS_THAN(0, ylo); NOT_MORE_THAN(my, yhi); #endif xlo = std::max(0, (int)xlo_); xhi = std::min(mx, (int)xhi_); ylo = std::max(0, (int)ylo_); yhi = std::min(my, (int)yhi_); for (y = ylo; y <= yhi; y++) for (x = xlo; x <= xhi; x++) bufout_gr8[x + y * wrapout] = flatval, count++; } xlo_ += aff.a11; xhi_ += aff.a11; ylo_ += aff.a21; yhi_ += aff.a21; } } delete[] colval; delete[] colheight; topy = 0; /*TCALLOC (nocheight, lx);*/ nocheight = new int[lx]; memset(nocheight, 0, lx * sizeof(int)); out_gr8 = bufout_gr8; for (x = 0; x < lx; x++) if (out_gr8[x] != GREY_GR8) nocheight[x]++; else nocheight[x] = 0; for (y = 0, yflt = colflt; y < ly; y++, yflt++) { for (; topq <= yflt->last; topq++) { xrow[topq] = xrow[topq - nrows]; xxx = xrow[topq]; memset(xxx, 0, sizeof(*xxx) * lx); /* 0.0 == nocalc */ while (topy < ly - 1 && colnoc[topy].last < topq) { topy++; out_gr8 = bufout_gr8 + topy * wrapout; for (x = 0; x < lx; x++) if (out_gr8[x] != GREY_GR8) nocheight[x]++; else nocheight[x] = 0; } if (topy < ly && colnoc[topy].first <= topq) { for (x = 0; x < lx; x++) if (nocheight[x] < nocdiamy) xxx[x] = 1.0; /* 1.0 == calc */ } else { for (x = 0; x < lx; x++) xxx[x] = 1.0; /* 1.0 == calc */ } memset(prow + pmin, 0, sizeof(*prow) * (pmax - pmin + 1)); /* 0.0 == calc */ nocwidth = 0; for (x = 0; x < lx; x++) if (xxx[x]) nocwidth = 0; else { nocwidth++; if (nocwidth >= nocdiamx) for (p = rownoc[x].first; p <= rownoc[x].last; p++) prow[p] = 1.0; /* 1.0 == nocalc */ } get_prow_gr8(rin, invrot.a11, invrot.a12, invrot.a21, invrot.a22, pmin, pmax, topq, prow); for (x = 0, xflt = rowflt; x < lx; x++, xflt++) if (xxx[x]) { for (tmp = 0.0, p = xflt->first; p <= xflt->last; p++) tmp += xflt->w[p] * prow[p]; xxx[x] = tmp; } } out_gr8 = bufout_gr8 + wrapout * y; for (x = 0; x < lx; x++) if (out_gr8[x] == GREY_GR8) { for (tmp = 0.0, q = yflt->first; q <= yflt->last; q++) tmp += yflt->w[q] * xrow[q][x]; out_gr8[x] = TO8BIT(tmp); } } // cest_plus_facile (xrow); for (q = 0; q < nrows; q++) delete xrow_base[q]; delete xrow_base; delete prow_base; } //--------------------------------------------------------------------------- static void rop_resample_rgbm32_gr8(const TRaster32P &rin, TRasterGR8P rout, const TAffine &aff, const TAffine &invrot, FILTER *rowflt, int pmin, int pmax, FILTER *colflt, int qmin, int qmax, int nrows, int flatradu, int flatradv, double flatradx_, double flatrady_, NOCALC *rownoc, int nocdiamx, NOCALC *colnoc, int nocdiamy) { FILTER *xflt, *yflt; UCHAR *bufout_gr8, *out_gr8; TPixel *bufin_32, *in_32; float *prow_base, *prow, **xrow_base, **xrow, *xxx, tmp; double x_, y_; int u, v; //, vw; int p, q; int x, y; int lu, lv, mu, mv; int lx, ly, mx, my; // int dudp, dudq, dvdp, dvdq; int topq, topy; int wrapin, wrapout; int flatdiamu, flatdiamv; int xlo, xhi, ylo, yhi; int *nocheight; int nocwidth; int i, j; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif bufin_32 = (TPixel *)rin->pixels(); bufout_gr8 = (UCHAR *)rout->pixels(); wrapin = rin->getWrap(); wrapout = rout->getWrap(); lu = rin->getLx(); mu = lu - 1; lv = rin->getLy(); mv = lv - 1; lx = rout->getLx(); mx = lx - 1; ly = rout->getLy(); my = ly - 1; prow_base = new float[pmax - pmin + 1]; prow = prow_base - pmin; xrow_base = new MyFloatPtr[qmax - (qmin - nrows) + 1]; xrow = xrow_base - (qmin - nrows); topq = qmin; // app = xrow+topq-nrows; i = 0; j = 3; for (i = 0; i < nrows; i++) *(xrow + topq - nrows + i) = new float[lx]; // while(app= flatdiamv) if (colval[u] == flatval) flatcols++; else flatcols = 1; else flatcols = 0; flatval = colval[u]; if (flatcols >= flatdiamu) { #ifdef VECCHIA_MANIERA x_ = AFF_M_V_1(aff, u - flatradu, v - flatradv); y_ = AFF_M_V_2(aff, u - flatradu, v - flatradv); xlo = CEIL(x_ - flatradx_); xhi = FLOOR(x_ + flatradx_); ylo = CEIL(y_ - flatrady_); yhi = FLOOR(y_ + flatrady_); NOT_LESS_THAN(0, xlo); NOT_MORE_THAN(mx, xhi); NOT_LESS_THAN(0, ylo); NOT_MORE_THAN(my, yhi); #endif xlo = std::max(0, (int)xlo_); xhi = std::min(mx, (int)xhi_); ylo = std::max(0, (int)ylo_); yhi = std::min(my, (int)yhi_); for (y = ylo; y <= yhi; y++) for (x = xlo; x <= xhi; x++) bufout_gr8[x + y * wrapout] = flatval, count++; } xlo_ += aff.a11; xhi_ += aff.a11; ylo_ += aff.a21; yhi_ += aff.a21; } } delete[] colval; delete[] colheight; topy = 0; /*TCALLOC (nocheight, lx);*/ nocheight = new int[lx]; memset(nocheight, 0, lx * sizeof(int)); out_gr8 = bufout_gr8; for (x = 0; x < lx; x++) if (out_gr8[x] != GREY_GR8) nocheight[x]++; else nocheight[x] = 0; for (y = 0, yflt = colflt; y < ly; y++, yflt++) { for (; topq <= yflt->last; topq++) { xrow[topq] = xrow[topq - nrows]; xxx = xrow[topq]; memset(xxx, 0, sizeof(*xxx) * lx); /* 0.0 == nocalc */ while (topy < ly - 1 && colnoc[topy].last < topq) { topy++; out_gr8 = bufout_gr8 + topy * wrapout; for (x = 0; x < lx; x++) if (out_gr8[x] != GREY_GR8) nocheight[x]++; else nocheight[x] = 0; } if (topy < ly && colnoc[topy].first <= topq) { for (x = 0; x < lx; x++) if (nocheight[x] < nocdiamy) xxx[x] = 1.0; /* 1.0 == calc */ } else { for (x = 0; x < lx; x++) xxx[x] = 1.0; /* 1.0 == calc */ } memset(prow + pmin, 0, sizeof(*prow) * (pmax - pmin + 1)); /* 0.0 == calc */ nocwidth = 0; for (x = 0; x < lx; x++) if (xxx[x]) nocwidth = 0; else { nocwidth++; if (nocwidth >= nocdiamx) for (p = rownoc[x].first; p <= rownoc[x].last; p++) prow[p] = 1.0; /* 1.0 == nocalc */ } get_prow_gr8(rin, invrot.a11, invrot.a12, invrot.a21, invrot.a22, pmin, pmax, topq, prow); for (x = 0, xflt = rowflt; x < lx; x++, xflt++) if (xxx[x]) { for (tmp = 0.0, p = xflt->first; p <= xflt->last; p++) tmp += xflt->w[p] * prow[p]; xxx[x] = tmp; } } out_gr8 = bufout_gr8 + wrapout * y; for (x = 0; x < lx; x++) if (out_gr8[x] == GREY_GR8) { for (tmp = 0.0, q = yflt->first; q <= yflt->last; q++) tmp += yflt->w[q] * xrow[q][x]; out_gr8[x] = TO8BIT(tmp); } } // cest_plus_facile (xrow); for (q = 0; q < nrows; q++) delete xrow_base[q]; delete xrow_base; delete prow_base; } //--------------------------------------------------------------------------- // #define USE_STATIC_VARS //--------------------------------------------------------------------------- template void rop_resample_rgbm(TRasterPT rout, const TRasterPT &rin, const TAffine &aff, TRop::ResampleFilterType flt_type, double blur) { #define FILTER_RESOLUTION 1024 #define MAX_FILTER_VAL 32767 #ifdef USE_STATIC_VARS static TRop::ResampleFilterType current_flt_type = TRop::None; static std::unique_ptr filter_array; static short *filter = 0; static int min_filter_fg, max_filter_fg; static int filter_array_size = 0; static int n_pix = 0; static std::unique_ptr pix_ref_u; static std::unique_ptr pix_ref_v; static std::unique_ptr pix_ref_f; static std::unique_ptr pix_ref_g; static int current_max_n_pix = 0; #else std::unique_ptr filter_array; short *filter = 0; int min_filter_fg, max_filter_fg; int filter_array_size = 0; int n_pix = 0; std::unique_ptr pix_ref_u; std::unique_ptr pix_ref_v; std::unique_ptr pix_ref_f; std::unique_ptr pix_ref_g; int current_max_n_pix = 0; #endif int filter_st_radius; int filter_fg_radius; int filter_size; int f; double s_; double weight_; int weight; TAffine aff_uv2xy; TAffine aff_xy2uv; TAffine aff0_uv2xy; TAffine aff0_xy2st; TAffine aff0_uv2st; TAffine aff0_st2fg; TAffine aff0_uv2fg; TAffine aff0_fg2uv; double scale_x, scale_y; double inv_blur; int max_n_pix; double min_pix_out_u_, min_pix_out_v_; double max_pix_out_u_, max_pix_out_v_; int min_pix_ref_u, min_pix_ref_v; int max_pix_ref_u, max_pix_ref_v; int cur_pix_ref_u, cur_pix_ref_v; double cur_pix_ref_f_, cur_pix_ref_g_; int cur_pix_ref_f, cur_pix_ref_g; double min_ref_out_f_, min_ref_out_g_; double max_ref_out_f_, max_ref_out_g_; int min_ref_out_f, min_ref_out_g; int max_ref_out_f, max_ref_out_g; int min_pix_ref_f, min_pix_ref_g; int max_pix_ref_f, max_pix_ref_g; int min_pix_out_f, min_pix_out_g; int max_pix_out_f, max_pix_out_g; int min_pix_out_fg; int max_pix_out_fg; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif assert(flt_type != TRop::None); // Retrieve the filter radius in the st and fx references filter_st_radius = get_filter_radius(flt_type); filter_fg_radius = filter_st_radius * FILTER_RESOLUTION; // Retrieve the transformation affines among the involved references // NOTE: The 0.5 translation is needed in order to make the later // resample_main procedures work with pixel centers. aff_uv2xy = aff * TTranslation(0.5, 0.5); aff0_uv2xy = aff_uv2xy.place(0.0, 0.0, 0.0, 0.0); aff_xy2uv = aff_uv2xy.inv(); // Consider the norm of (1,0) and (0,1) images. scale_x = sqrt(sq(aff_uv2xy.a11) + sq(aff_uv2xy.a12)); scale_y = sqrt(sq(aff_uv2xy.a21) + sq(aff_uv2xy.a22)); // Inserting the following scale will make shrinks look smooth. aff0_xy2st = TScale((scale_x > 1.0) ? 1.0 / scale_x : 1.0, (scale_y > 1.0) ? 1.0 / scale_y : 1.0); if (blur > 1.0) // Consider the blur as a scale in the filter reference { inv_blur = 1.0 / blur; aff0_xy2st = TScale(inv_blur, inv_blur) * aff0_xy2st; } aff0_uv2st = aff0_xy2st * aff0_uv2xy; aff0_st2fg = TScale(FILTER_RESOLUTION, FILTER_RESOLUTION); aff0_uv2fg = aff0_st2fg * aff0_uv2st; aff0_fg2uv = aff0_uv2fg.inv(); // Take the pre-image of the filter mask in uv coordinates. This is where // input pixels will be taken to find an output one. minmax(-filter_fg_radius, -filter_fg_radius, filter_fg_radius, filter_fg_radius, aff0_fg2uv, min_pix_out_u_, min_pix_out_v_, max_pix_out_u_, max_pix_out_v_); // Adjust them to integer coordinates. The intent here is that // of isolating their fractionary part - furthermore, we'll take // the *opposites* of fractionary parts (explained later). // NOTE: We'll assume we want to include in the filter mask all //*integer positions around a fractionary displacement of the origin*; // so the approximations below are stricly necessary. min_pix_ref_u = intLE(min_pix_out_u_); min_pix_ref_v = intLE(min_pix_out_v_); max_pix_ref_u = intGE(max_pix_out_u_); max_pix_ref_v = intGE(max_pix_out_v_); if (blur <= 1.0) { // If the blur radius has sub-pixel width if (aff_uv2xy.a12 == 0.0 && aff_uv2xy.a21 == 0.0) { // And it's the sole scales case if (aff_uv2xy.a11 == 1.0 && isInt(aff_uv2xy.a13 - 0.5)) { // And the x mapping is bijective, then prevent any filtering. min_pix_ref_u = 0; max_pix_ref_u = 0; } if (aff_uv2xy.a22 == 1.0 && isInt(aff_uv2xy.a23 - 0.5)) { // And the y mapping is bijective ... min_pix_ref_v = 0; max_pix_ref_v = 0; } } else if (aff_uv2xy.a11 == 0.0 && aff_uv2xy.a22 == 0.0) { // The mirrored version of the one above if (aff_uv2xy.a12 == 1.0 && isInt(aff_uv2xy.a13 - 0.5)) { min_pix_ref_v = 0; max_pix_ref_v = 0; } if (aff_uv2xy.a21 == 1.0 && isInt(aff_uv2xy.a23 - 0.5)) { min_pix_ref_u = 0; max_pix_ref_u = 0; } } } // Take the number of pixels involved in the filter (uv reference) max_n_pix = (max_pix_ref_u - min_pix_ref_u + 1) * (max_pix_ref_v - min_pix_ref_v + 1); if (max_n_pix > current_max_n_pix) { current_max_n_pix = max_n_pix; pix_ref_u.reset(new int[current_max_n_pix]); pix_ref_v.reset(new int[current_max_n_pix]); pix_ref_f.reset(new int[current_max_n_pix]); pix_ref_g.reset(new int[current_max_n_pix]); assert(pix_ref_u && pix_ref_v && pix_ref_f && pix_ref_g); } // Build the image of fractionary domain from the uv to fg reference minmax(-1, -1, 0, 0, aff0_uv2fg, min_ref_out_f_, min_ref_out_g_, max_ref_out_f_, max_ref_out_g_); min_ref_out_f = tround(min_ref_out_f_); min_ref_out_g = tround(min_ref_out_g_); max_ref_out_f = tround(max_ref_out_f_); max_ref_out_g = tround(max_ref_out_g_); // Remember that negative fractionary parts must be subtracted from their // integer counterparts min_pix_ref_f = -filter_fg_radius - max_ref_out_f; min_pix_ref_g = -filter_fg_radius - max_ref_out_g; max_pix_ref_f = filter_fg_radius - min_ref_out_f; max_pix_ref_g = filter_fg_radius - min_ref_out_g; min_pix_out_f = c_maxint; min_pix_out_g = c_maxint; max_pix_out_f = c_minint; max_pix_out_g = c_minint; n_pix = 0; if (!pix_ref_u || !pix_ref_v || !pix_ref_f || !pix_ref_g) { throw TRopException( "tresample.cpp line2640 function rop_resample_rgbm() : alloc pix_ref " "failed"); } // Build the *integer* part of the fg images of those coordinates inside the // uv filter bounds. // NOTE: Doing so reduces the execution time for the later resample_main // procedure - // the idea is the following: // We want to build the output pixel (x,y) obtained from the source image // through A. // Then, we find (u,v) = (A^-1) * (x,y) = ([u],[v]) + ({u},{v}), where [] and // {} // denote integer and fractionary parts. // Now, the convolution positions on fg for (u,v) can be thought of being // calculated by taking // images of integer displacements of (u,v). So, their calculation is // definitely *not* directly // dependent on the fractionary part of (u,v) - that is, the (i,j)th // displacement position of FG(u,v) // is: // FG([u]+i,[v]+j) = FG(u+i,v+j) - FG({u},{v}) = FG(i,j) - // FG({u},{v}); // // where it is assumed that FG(u,v) = (0,0), since the filter is to be // considered centered on (u,v). for (cur_pix_ref_v = min_pix_ref_v; cur_pix_ref_v <= max_pix_ref_v; cur_pix_ref_v++) for (cur_pix_ref_u = min_pix_ref_u; cur_pix_ref_u <= max_pix_ref_u; cur_pix_ref_u++) { // Get the image of current uv position cur_pix_ref_f_ = affMV1(aff0_uv2fg, cur_pix_ref_u, cur_pix_ref_v); cur_pix_ref_g_ = affMV2(aff0_uv2fg, cur_pix_ref_u, cur_pix_ref_v); // And round it to the closest integer in fg cur_pix_ref_f = tround(cur_pix_ref_f_); cur_pix_ref_g = tround(cur_pix_ref_g_); if (min_pix_ref_f <= cur_pix_ref_f && cur_pix_ref_f <= max_pix_ref_f && min_pix_ref_g <= cur_pix_ref_g && cur_pix_ref_g <= max_pix_ref_g) { pix_ref_u[n_pix] = cur_pix_ref_u; pix_ref_v[n_pix] = cur_pix_ref_v; pix_ref_f[n_pix] = cur_pix_ref_f; pix_ref_g[n_pix] = cur_pix_ref_g; notMoreThan(cur_pix_ref_f + min_ref_out_f, min_pix_out_f); // cur_pix_ref > min_pix_out - min_ref_out notMoreThan(cur_pix_ref_g + min_ref_out_g, min_pix_out_g); notLessThan(cur_pix_ref_f + max_ref_out_f, max_pix_out_f); // cur_pix_ref < max_pix_out - max_ref_out notLessThan(cur_pix_ref_g + max_ref_out_g, max_pix_out_g); n_pix++; } } assert(n_pix > 0); #ifdef USE_STATIC_VARS if (flt_type != current_flt_type) { current_flt_type = flt_type; #endif // Build a sufficient filter weights array min_filter_fg = -filter_fg_radius - FILTER_RESOLUTION * 3 / 2; //??? max_filter_fg = filter_fg_radius + FILTER_RESOLUTION * 3 / 2; filter_size = max_filter_fg - min_filter_fg + 1; if (filter_size > filter_array_size) // For the static vars case... { filter_array.reset(new short[filter_size]); assert(filter_array); filter_array_size = filter_size; } filter = filter_array.get() - min_filter_fg; // Take the position // corresponding to fg's (0,0) // in the array filter[0] = MAX_FILTER_VAL; for (f = 1, s_ = 1.0 / FILTER_RESOLUTION; f < filter_fg_radius; f++, s_ += 1.0 / FILTER_RESOLUTION) { // Symmetrically build the array weight_ = get_filter_value(flt_type, s_) * (double)MAX_FILTER_VAL; weight = tround(weight_); filter[f] = weight; filter[-f] = weight; } for (f = filter_fg_radius; f <= max_filter_fg; f++) filter[f] = 0; for (f = -filter_fg_radius; f >= min_filter_fg; f--) filter[f] = 0; #ifdef USE_STATIC_VARS } #endif // Considering the bounding square in fg min_pix_out_fg = std::min(min_pix_out_f, min_pix_out_g); max_pix_out_fg = std::max(max_pix_out_f, max_pix_out_g); if (min_pix_out_fg < min_filter_fg || max_pix_out_fg > max_filter_fg) { // Reallocate the filter... and so on... filter_size = max_pix_out_fg - min_pix_out_fg + 1; if (filter_size > filter_array_size) { // controllare!! // TREALLOC (filter_array, filter_size) filter_array.reset(new short[filter_size]); assert(filter_array); filter_array_size = filter_size; } filter = filter_array.get() - min_filter_fg; if (min_pix_out_fg < min_filter_fg) { int delta = min_filter_fg - min_pix_out_fg; for (f = max_filter_fg; f >= min_filter_fg; f--) filter[f + delta] = filter[f]; filter += delta; for (f = min_filter_fg - 1; f >= min_pix_out_fg; f--) filter[f] = 0; min_filter_fg = min_pix_out_fg; } if (max_pix_out_fg > max_filter_fg) { for (f = max_filter_fg + 1; f <= max_pix_out_fg; f++) filter[f] = 0; max_filter_fg = max_pix_out_fg; } } #ifdef USE_SSE2 if ((TSystem::getCPUExtensions() & TSystem::CpuSupportsSse2) && T::maxChannelValue == 255) resample_main_rgbm_SSE2(rout, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u.get(), pix_ref_v.get(), pix_ref_f.get(), pix_ref_g.get(), filter); else #endif if (n_pix >= 512 || T::maxChannelValue > 255) resample_main_rgbm( rout, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u.get(), pix_ref_v.get(), pix_ref_f.get(), pix_ref_g.get(), filter); else resample_main_rgbm( rout, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u.get(), pix_ref_v.get(), pix_ref_f.get(), pix_ref_g.get(), filter); } //--------------------------------------------------------------------------- static void free_filter(FILTER *filter, int lx) { for (--lx; lx >= 0; lx--) if (filter[lx].w_base) delete (filter[lx].w_base); delete[] filter; } //----------------------------------------------------------------------------- void do_resample(TRasterGR8P rout, const TRasterGR8P &rin, const TAffine &aff, TRop::ResampleFilterType flt_type, double blur) { double jacob; double s11, s22, s13, s23; FILTER *rowf, *colf; NOCALC *rown, *coln; int pmin, pmax, qmin, qmax; int nrows, dummy; double negradu_, negradv_, posradu_, posradv_; double negradx_, negrady_, posradx_, posrady_; int nocdiamx, nocdiamy; double rad_x, rad_y; TAffine rot, scale, invrot; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif if (!(rout->getLx() > 0 && rout->getLy() > 0)) /* immagine out vuota */ { return; } if (!(rin->getLx() > 0 && rin->getLy() > 0)) /* immagine in vuota */ { rout->fill(TPixelGR8::Black); // Black_rgbm return; } TRasterGR8P routGR8 = rout, rinGR8 = rin; if (routGR8 && rinGR8) { jacob = fabs(aff.det()); if (jacob == 0.0) throw TRopException("AFFINE transformation has zero determinant"); if (jacob < 1E-30) throw TRopException( "AFFINE transformation has (nearly) zero determinant"); s11 = sqrt(jacob); /* provvisorio */ s22 = s11; s13 = aff.a13; s23 = aff.a23; // rot = aff_place (0.0, 0.0, 0.0, 0.0, TScale(1/s11, // 1/s22)*aff);//eventualmente invertire ordine rot = (TScale(1 / s11, 1 / s22) * aff).place(0.0, 0.0, 0.0, 0.0); // scale = aff_place (0.0, 0.0, s13, s23, TScale(s11, s22)); scale = TScale(s11, s22).place(0.0, 0.0, s13, s23); invrot = rot.inv(); rowf = create_filter(flt_type, blur, scale.a11, scale.a13, rout->getLx(), rad_x, pmin, pmax, dummy); colf = create_filter(flt_type, blur, scale.a22, scale.a23, rout->getLy(), rad_y, qmin, qmax, nrows); rown = create_nocalc(flt_type, blur, scale.a11, scale.a13, rout->getLx(), pmin, pmax, nocdiamx); coln = create_nocalc(flt_type, blur, scale.a22, scale.a23, rout->getLy(), qmin, qmax, nocdiamy); #ifdef DBMALLOC malloc_chain_check(TRUE); #endif #ifdef MEMLEAK CheckMemory(); #endif TAffine aff_0 = aff.place(0.0, 0.0, 0.0, 0.0); TAffine inv_0 = aff_0.inv(); minmax(-0.5, -0.5, 0.5, 0.5, aff_0, negradx_, negrady_, posradx_, posrady_); double flatradx_ = posradx_; double flatrady_ = posrady_; minmax(negradx_ - rad_x, negrady_ - rad_y, posradx_ + rad_x, posrady_ + rad_y, inv_0, negradu_, negradv_, posradu_, posradv_); int flatradu = tceil(posradu_) - 1; int flatradv = tceil(posradv_) - 1; rop_resample_gr8(rin, rout, aff, invrot, rowf, pmin, pmax, colf, qmin, qmax, nrows, flatradu, flatradv, flatradx_, flatrady_, rown, nocdiamx, coln, nocdiamy); // free_nocalc (coln); if (coln) delete (coln); // free_nocalc (rown); if (rown) delete (rown); free_filter(colf, rout->getLy()); free_filter(rowf, rout->getLx()); //----NON GESTIAMO ANCORA EXTRA BUFFER // rop_resample_extra (rin, rout, aff); return; } else throw TRopException("unsupported pixel type"); } //----------------------------------------------------------------------------- void do_resample(TRasterGR8P rout, const TRaster32P &rin, const TAffine &aff, TRop::ResampleFilterType flt_type, double blur) { double jacob; double s11, s22, s13, s23; FILTER *rowf, *colf; NOCALC *rown, *coln; int pmin, pmax, qmin, qmax; int nrows, dummy; double negradu_, negradv_, posradu_, posradv_; double negradx_, negrady_, posradx_, posrady_; int nocdiamx, nocdiamy; double rad_x, rad_y; TAffine rot, scale, invrot; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif if (!(rout->getLx() > 0 && rout->getLy() > 0)) /* immagine out vuota */ { return; } if (!(rin->getLx() > 0 && rin->getLy() > 0)) /* immagine in vuota */ { rout->fill(TPixelGR8::Black); // Black_rgbm return; } jacob = fabs(aff.det()); if (jacob == 0.0) throw TRopException("AFFINE transformation has zero determinant"); if (jacob < 1E-30) throw TRopException("AFFINE transformation has (nearly) zero determinant"); s11 = sqrt(jacob); /* provvisorio */ s22 = s11; s13 = aff.a13; s23 = aff.a23; // rot = aff_place (0.0, 0.0, 0.0, 0.0, TScale(1/s11, // 1/s22)*aff);//eventualmente invertire ordine rot = (TScale(1 / s11, 1 / s22) * aff).place(0.0, 0.0, 0.0, 0.0); // scale = aff_place (0.0, 0.0, s13, s23, TScale(s11, s22)); scale = TScale(s11, s22).place(0.0, 0.0, s13, s23); invrot = rot.inv(); rowf = create_filter(flt_type, blur, scale.a11, scale.a13, rout->getLx(), rad_x, pmin, pmax, dummy); colf = create_filter(flt_type, blur, scale.a22, scale.a23, rout->getLy(), rad_y, qmin, qmax, nrows); rown = create_nocalc(flt_type, blur, scale.a11, scale.a13, rout->getLx(), pmin, pmax, nocdiamx); coln = create_nocalc(flt_type, blur, scale.a22, scale.a23, rout->getLy(), qmin, qmax, nocdiamy); #ifdef DBMALLOC malloc_chain_check(TRUE); #endif #ifdef MEMLEAK CheckMemory(); #endif TAffine aff_0 = aff.place(0.0, 0.0, 0.0, 0.0); TAffine inv_0 = aff_0.inv(); minmax(-0.5, -0.5, 0.5, 0.5, aff_0, negradx_, negrady_, posradx_, posrady_); double flatradx_ = posradx_; double flatrady_ = posrady_; minmax(negradx_ - rad_x, negrady_ - rad_y, posradx_ + rad_x, posrady_ + rad_y, inv_0, negradu_, negradv_, posradu_, posradv_); int flatradu = tceil(posradu_) - 1; int flatradv = tceil(posradv_) - 1; rop_resample_rgbm32_gr8(rin, rout, aff, invrot, rowf, pmin, pmax, colf, qmin, qmax, nrows, flatradu, flatradv, flatradx_, flatrady_, rown, nocdiamx, coln, nocdiamy); // free_nocalc (coln); if (coln) delete[] coln; // free_nocalc (rown); if (rown) delete[] rown; free_filter(colf, rout->getLy()); free_filter(rowf, rout->getLx()); //----NON GESTIAMO ANCORA EXTRA BUFFER // rop_resample_extra (rin, rout, aff); return; // else throw TRopException("unsupported pixel type"); } //----------------------------------------------------------------------------- template void do_resample(TRasterPT rout, const TRasterPT &rin, const TAffine &aff, TRop::ResampleFilterType flt_type, double blur) { #ifdef ALTRI_TIPI_DI_RASTER double jacob; double s11, s22, s13, s23; FILTER *rowf, *colf; NOCALC *rown, *coln; int pmin, pmax, qmin, qmax; int nrows, dummy; double negradu_, negradv_, posradu_, posradv_; double negradx_, negrady_, posradx_, posrady_; int nocdiamx, nocdiamy; double rad_x, rad_y; #endif if (!(rout->getLx() > 0 && rout->getLy() > 0)) /* immagine out vuota */ { return; } if (!(rin->getLx() > 0 && rin->getLy() > 0)) /* immagine in vuota */ { rout->fill(T::Black); // Black_rgbm return; } TRasterPT rout_ = rout, rin_ = rin; if (rout_ && rin_) { rop_resample_rgbm(rout, rin, aff, flt_type, blur); return; } else throw TRopException("unsupported pixel type"); #ifdef ALTRI_TIPI_DI_RASTER jacob = fabs(aff.det()); if (jacob == 0.0) throw TRopException("AFFINE transformation has zero determinant"); if (jacob < 1E-30) throw TRopException("AFFINE transformation has (nearly) zero determinant"); s11 = sqrt(jacob); /* provvisorio */ s22 = s11; s13 = aff.a13; s23 = aff.a23; rot = (TScale(1 / s11, 1 / s22) * aff).place(0.0, 0.0, 0.0, 0.0); scale = TScale(s11, s22).place(0.0, 0.0, s13, s23); invrot = rot.inv(); rowf = create_filter(flt_type, blur, scale.a11, scale.a13, rout->getLx(), rad_x, pmin, pmax, dummy); colf = create_filter(flt_type, blur, scale.a22, scale.a23, rout->getLy(), rad_y, qmin, qmax, nrows); rown = create_nocalc(flt_type, blur, scale.a11, scale.a13, rout->getLx(), pmin, pmax, nocdiamx); coln = create_nocalc(flt_type, blur, scale.a22, scale.a23, rout->getLy(), qmin, qmax, nocdiamy); #ifdef DBMALLOC malloc_chain_check(TRUE); #endif #ifdef MEMLEAK CheckMemory(); #endif aff_0 = aff.place(0.0, 0.0, 0.0, 0.0); inv_0 = aff_0.inv(); minmax(-0.5, -0.5, 0.5, 0.5, aff_0, negradx_, negrady_, posradx_, posrady_); minmax(negradx_ - rad_x, negrady_ - rad_y, posradx_ + rad_x, posrady_ + rad_y, inv_0, negradu_, negradv_, posradu_, posradv_); if (coln) delete (coln); if (rown) delete (rown); free_filter(colf, rout->getLy()); free_filter(rowf, rout->getLx()); #endif } //----------------------------------------------------------------------------- typedef struct { TUINT32 val; double tot; } BLOB24; //----------------------------------------------------------------------------- #define MINOREQ(x, a) ((x) >= 0 && (x) <= (a)) #define MINOR(x, a) ((x) >= 0 && (x) < (a)) //----------------------------------------------------------------------------- } // namespace #ifndef TNZCORE_LIGHT namespace { void do_resample(TRasterCM32P rout, const TRasterCM32P &rin, const TAffine &aff) { TAffine inv; int lx, ly, mx, my; int lu, lv, mu, mv; int x, y, u, v; double u_0, v_0, u_, v_; double fu, fv, gu, gv; int i, j; int wrapin, wrapout; TUINT32 *bufin_tcm, *bufout_tcm; TUINT32 *in_tcm, *out_tcm; TUINT32 tcm[4]; double w[4]; TUINT32 transp; BLOB24 color_blob[4], new_color_blob; BLOB24 pencil_blob[4], new_pencil_blob; int color_blobs; int pencil_blobs; bool some_pencil; double tone_tot; TUINT32 color_mask, pencil_mask; TUINT32 tone_mask; int tone; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif if (!(rout->getLx() > 0 && rout->getLy() > 0)) /* immagine out vuota */ return; rout->lock(); if (!(rin->getLx() > 0 && rin->getLy() > 0)) /* immagine in vuota */ { for (y = 0; y < rout->getLy(); y++) for (x = 0; x < rout->getLx(); x++) ((TUINT32 *)rout->getRawData())[x + y * rout->getWrap()] = 0xff; rout->unlock(); return; } rin->lock(); bufin_tcm = (TUINT32 *)rin->getRawData(); bufout_tcm = (TUINT32 *)rout->getRawData(); wrapin = rin->getWrap(); wrapout = rout->getWrap(); lu = rin->getLx(); mu = lu - 1; lv = rin->getLy(); mv = lv - 1; lx = rout->getLx(); mx = lx - 1; ly = rout->getLy(); my = ly - 1; inv = aff.inv(); pencil_mask = TPixelCM32::getInkMask(); color_mask = TPixelCM32::getPaintMask(); tone_mask = TPixelCM32::getToneMask(); transp = tone_mask; assert(tone_mask & 0x1); // Ensure that tone lies in the less significative bits // deal with every output line independently for (y = 0; y < ly; y++) { // Take inv*(0,y) u_0 = inv.a12 * y + inv.a13; v_0 = inv.a22 * y + inv.a23; out_tcm = bufout_tcm + wrapout * y; x = 0; // Place transparent pixels until we reach a useful source pos. for (; x < lx; x++) { // Add inv*(x,0) and floor it u_ = u_0 + x * inv.a11; u = tfloor(u_); v_ = v_0 + x * inv.a21; v = tfloor(v_); if (MINOREQ(u + 1, lu) && MINOREQ(v + 1, lv)) // u>=-1 && u=-1 && v=0 && u=0 && v 0 && color_blob[j].tot > color_blob[j - 1].tot; j--) std::swap(color_blob[j], color_blob[j - 1]); new_pencil_blob.val = tcm[i] & pencil_mask; new_pencil_blob.tot = (tone_mask - tone) * w[i]; for (j = 0; j < pencil_blobs; j++) if (pencil_blob[j].val == new_pencil_blob.val) break; if (j < pencil_blobs) pencil_blob[j].tot += new_pencil_blob.tot; else pencil_blob[pencil_blobs++] = new_pencil_blob; for (; j > 0 && pencil_blob[j].tot > pencil_blob[j - 1].tot; j--) std::swap(pencil_blob[j], pencil_blob[j - 1]); } tone = troundp(tone_tot); // if (some_pencil && (TUINT32)tone == tone_mask) // tone--; // if (color_blob[0].val==0 && pencil_blob[0].val==0) // tone = 255; *out_tcm++ = color_blob[0].val | pencil_blob[0].val | tone; } } // Deal with useful source positions on the output line's pre-image for (; x < lx; x++) { u_ = u_0 + x * inv.a11; u = tfloor(u_); v_ = v_0 + x * inv.a21; v = tfloor(v_); if (!(MINOR(u, lu) && MINOR(v, lv))) // u<0 || u>=lu || v<0 || v>=lv break; in_tcm = bufin_tcm + u + v * wrapin; // Take the associated input pixel pointer tcm[0] = in_tcm[0]; if (u < lu - 1 && v < lv - 1) { // Also take their 4 next neighours (we shall perform a kinf of bilinear // interpolation) tcm[1] = in_tcm[1]; tcm[2] = in_tcm[wrapin]; tcm[3] = in_tcm[wrapin + 1]; } else { // Eventually, simulate the off-boundary ones tcm[1] = (u == lu - 1) ? in_tcm[0] : in_tcm[1]; tcm[2] = (v == lv - 1) ? in_tcm[0] : in_tcm[wrapin]; tcm[3] = (u == lu - 1 || v == lv - 1) ? in_tcm[0] : in_tcm[wrapin + 1]; } if (tcm[0] == tcm[1] && tcm[1] == tcm[2] && tcm[2] == tcm[3]) *out_tcm++ = tcm[0]; // If they are all equal, it's a copy-op else { // Otherwise, take the bilinear coordinates fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; w[0] = gu * gv; w[2] = gu * fv; // And the associated weights w[1] = fu * gv; w[3] = fu * fv; color_blobs = pencil_blobs = 0; tone_tot = 0.0; some_pencil = false; // Examine all neighbouring pixels for (i = 0; i < 4; i++) { tone = tcm[i] & tone_mask; // Take the tone if ((TUINT32)tone != tone_mask) some_pencil = true; tone_tot += tone * w[i]; // Build the weighted tone sum new_color_blob.val = tcm[i] & color_mask; new_color_blob.tot = tone * w[i]; // And the weighted paint tone for this pixel // Fill in the different colors found in an array. Equal colors are // stored as one // with summed weighted total tone. for (j = 0; j < color_blobs; j++) if (color_blob[j].val == new_color_blob.val) break; if (j < color_blobs) color_blob[j].tot += new_color_blob.tot; else color_blob[color_blobs++] = new_color_blob; // Sort the stored colors for decreasing weighted total tone for (; j > 0 && color_blob[j].tot > color_blob[j - 1].tot; j--) std::swap(color_blob[j], color_blob[j - 1]); // Deal the same way with ink colors. new_pencil_blob.val = tcm[i] & pencil_mask; new_pencil_blob.tot = (tone_mask - tone) * w[i]; for (j = 0; j < pencil_blobs; j++) if (pencil_blob[j].val == new_pencil_blob.val) break; if (j < pencil_blobs) pencil_blob[j].tot += new_pencil_blob.tot; else pencil_blob[pencil_blobs++] = new_pencil_blob; for (; j > 0 && pencil_blob[j].tot > pencil_blob[j - 1].tot; j--) std::swap(pencil_blob[j], pencil_blob[j - 1]); } tone = tround(tone_tot); // if (some_pencil && (TUINT32)tone == tone_mask) // tone--; // if (color_blob[0].val==0 && pencil_blob[0].val==0) // tone = 255; // The output colors shall be the ones with maximum weighted total tone, // with the overall total tone as output tone. *out_tcm++ = color_blob[0].val | pencil_blob[0].val | tone; } } // Again, deal with border pixels at the end of line's pre-image for (; x < lx; x++) { u_ = u_0 + x * inv.a11; u = tfloor(u_); v_ = v_0 + x * inv.a21; v = tfloor(v_); if (!(MINOREQ(u + 1, lu) && MINOREQ(v + 1, lv))) // u<-1 || u>=lu || v<-1 || v>=lv break; in_tcm = bufin_tcm + u + v * wrapin; bool u0 = MINOREQ(u, mu); bool v0 = MINOREQ(v, mv); bool u1 = MINOREQ(u + 1, mv); bool v1 = MINOREQ(v + 1, mv); tcm[0] = (u0 && v0) ? in_tcm[0] : transp; tcm[1] = (u1 && v0) ? in_tcm[1] : transp; tcm[2] = (u0 && v1) ? in_tcm[wrapin] : transp; tcm[3] = (u1 && v1) ? in_tcm[wrapin + 1] : transp; if (tcm[0] == tcm[1] && tcm[1] == tcm[2] && tcm[2] == tcm[3]) *out_tcm++ = tcm[0]; else { fu = u_ - u; gu = 1. - fu; fv = v_ - v; gv = 1. - fv; w[0] = gu * gv; w[2] = gu * fv; w[1] = fu * gv; w[3] = fu * fv; color_blobs = pencil_blobs = 0; tone_tot = 0.0; some_pencil = false; for (i = 0; i < 4; i++) { tone = tcm[i] & tone_mask; if ((TUINT32)tone != tone_mask) some_pencil = true; tone_tot += tone * w[i]; new_color_blob.val = tcm[i] & color_mask; new_color_blob.tot = tone * w[i]; for (j = 0; j < color_blobs; j++) if (color_blob[j].val == new_color_blob.val) break; if (j < color_blobs) color_blob[j].tot += new_color_blob.tot; else color_blob[color_blobs++] = new_color_blob; for (; j > 0 && color_blob[j].tot > color_blob[j - 1].tot; j--) std::swap(color_blob[j], color_blob[j - 1]); new_pencil_blob.val = tcm[i] & pencil_mask; new_pencil_blob.tot = (tone_mask - tone) * w[i]; for (j = 0; j < pencil_blobs; j++) if (pencil_blob[j].val == new_pencil_blob.val) break; if (j < pencil_blobs) pencil_blob[j].tot += new_pencil_blob.tot; else pencil_blob[pencil_blobs++] = new_pencil_blob; for (; j > 0 && pencil_blob[j].tot > pencil_blob[j - 1].tot; j--) std::swap(pencil_blob[j], pencil_blob[j - 1]); } tone = troundp(tone_tot); // if (some_pencil && (TUINT32)tone == tone_mask) // tone--; // if (color_blob[0].val==0 && pencil_blob[0].val==0) // tone = 255; *out_tcm++ = color_blob[0].val | pencil_blob[0].val | tone; } } // Finally, deal with out-of-source pixels at the end of line's pre-image for (; x < lx; x++) *out_tcm++ = transp; } rin->unlock(); rout->unlock(); } //----------------------------------------------------------------------------- #ifdef USE_SSE2 template void resample_main_cm32_rgbm_SSE2(TRasterPT rout, const TRasterCM32P &rin, const TAffine &aff_xy2uv, const TAffine &aff0_uv2fg, int min_pix_ref_u, int min_pix_ref_v, int max_pix_ref_u, int max_pix_ref_v, int n_pix, int *pix_ref_u, int *pix_ref_v, int *pix_ref_f, int *pix_ref_g, short *filter, TPalette *palette) { __m128i zeros = _mm_setzero_si128(); const TPixelCM32 *buffer_in; T *buffer_out; int lu, lv, wrap_in, mu, mv; int lx, ly, wrap_out; int out_x, out_y; double out_x_, out_y_; double out_u_, out_v_; int ref_u, ref_v; int pix_u, pix_v; double ref_out_u_, ref_out_v_; double ref_out_f_, ref_out_g_; int ref_out_f, ref_out_g; int pix_out_f, pix_out_g; int inside_offset_u, inside_offset_v; UINT inside_limit_u, inside_limit_v; int inside_nonempty; double outside_min_u_, outside_min_v_; double outside_max_u_, outside_max_v_; UCHAR *calc; int calc_allocsize; int calc_bytewrap; UCHAR calc_value; bool must_calc; T pix_value; T default_value(0, 0, 0, 0); float weight; float sum_weights; float inv_sum_weights; int i; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif T *pix_out; __m128 sum_contribs_packed; __m128 pix_value_packed; __m128 weight_packed; __m128 zeros2 = _mm_setzero_ps(); float maxChannelValue = (float)T::maxChannelValue; __m128 maxChanneValue_packed = _mm_load1_ps(&maxChannelValue); if (!(rout->getLx() > 0 && rout->getLy() > 0)) return; if (!(rin->getLx() > 0 && rin->getLy() > 0)) { resample_clear_rgbm(rout, default_value); return; } calc = 0; calc_allocsize = 0; create_calc(rin, min_pix_ref_u, max_pix_ref_u, min_pix_ref_v, max_pix_ref_v, calc, calc_allocsize, calc_bytewrap); buffer_in = rin->pixels(); buffer_out = rout->pixels(); lu = rin->getLx(); lx = rout->getLx(); lv = rin->getLy(); ly = rout->getLy(); wrap_in = rin->getWrap(); wrap_out = rout->getWrap(); mu = lu - 1; mv = lv - 1; inside_offset_u = -min_pix_ref_u; inside_offset_v = -min_pix_ref_v; inside_limit_u = lu - max_pix_ref_u - inside_offset_u; inside_limit_v = lv - max_pix_ref_v - inside_offset_v; inside_nonempty = (int)inside_limit_u > 0 && (int)inside_limit_v > 0; outside_min_u_ = -0.5; outside_min_v_ = -0.5; outside_max_u_ = lu - 0.5; outside_max_v_ = lv - 0.5; int count = palette->getStyleCount(); int count2 = std::max({count, TPixelCM32::getMaxInk(), TPixelCM32::getMaxPaint()}); TPixelFloat *paints = (TPixelFloat *)_aligned_malloc(count2 * sizeof(TPixelFloat), 16); TPixelFloat *inks = (TPixelFloat *)_aligned_malloc(count2 * sizeof(TPixelFloat), 16); std::vector paints2(count2); std::vector inks2(count2); for (i = 0; i < palette->getStyleCount(); i++) { TPixel32 color = ::premultiply(palette->getStyle(i)->getAverageColor()); paints[i] = inks[i] = TPixelFloat(color); paints2[i] = inks2[i] = color; } float maxTone = (float)TPixelCM32::getMaxTone(); __m128 den_packed = _mm_load1_ps(&maxTone); for (out_y = 0, out_y_ = 0.0; out_y < ly; out_y++, out_y_ += 1.0) { for (out_x = 0, out_x_ = 0.0; out_x < lx; out_x++, out_x_ += 1.0) { pix_out = buffer_out + out_y * wrap_out + out_x; out_u_ = affMV1(aff_xy2uv, out_x_, out_y_); out_v_ = affMV2(aff_xy2uv, out_x_, out_y_); ref_u = intLE(out_u_); ref_v = intLE(out_v_); if (inside_nonempty && (UINT)(ref_u - inside_offset_u) < inside_limit_u && (UINT)(ref_v - inside_offset_v) < inside_limit_v) { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; if (calc_value && ((calc_value >> (ref_u & 7)) & 1)) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_packed = _mm_setzero_ps(); for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (float)((filter[pix_out_f] * filter[pix_out_g]) >> 16); pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; int pix_in_pos = pix_u + pix_v * wrap_in; const TPixelCM32 *pix_in = buffer_in + pix_in_pos; int tone = pix_in->getTone(); int paint = pix_in->getPaint(); int ink = pix_in->getInk(); if (tone == TPixelCM32::getMaxTone()) pix_value_packed = _mm_load_ps((float *)&(paints[paint])); else if (tone == 0) pix_value_packed = _mm_load_ps((float *)&(inks[ink])); else { float tt = (float)tone; blendBySSE2(pix_value_packed, // il valore calcolato (float *)&(inks[ink]), (float *)&(paints[paint]), &tt, den_packed, zeros); } weight_packed = _mm_load1_ps(&weight); sum_contribs_packed = _mm_add_ps(sum_contribs_packed, _mm_mul_ps(pix_value_packed, weight_packed)); sum_weights += weight; } inv_sum_weights = 1.0f / sum_weights; __m128 inv_sum_weights_packed = _mm_load1_ps(&inv_sum_weights); __m128 out_fval_packed = _mm_mul_ps(sum_contribs_packed, inv_sum_weights_packed); out_fval_packed = _mm_max_ps(out_fval_packed, zeros2); out_fval_packed = _mm_min_ps(out_fval_packed, maxChanneValue_packed); __m128i out_value_packed_i = _mm_cvtps_epi32(out_fval_packed); out_value_packed_i = _mm_packs_epi32(out_value_packed_i, zeros); out_value_packed_i = _mm_packus_epi16(out_value_packed_i, zeros); *(DWORD *)(pix_out) = _mm_cvtsi128_si32(out_value_packed_i); } else { int pix_in_pos = ref_u + ref_v * wrap_in; const TPixelCM32 *pix_in = buffer_in + pix_in_pos; int tone = pix_in->getTone(); int paint = pix_in->getPaint(); int ink = pix_in->getInk(); if (tone == TPixelCM32::getMaxTone()) *pix_out = paints2[paint]; else if (tone == 0) *pix_out = inks2[ink]; else *pix_out = blend(inks2[ink], paints2[paint], tone, TPixelCM32::getMaxTone()); } } else if (outside_min_u_ <= out_u_ && out_u_ <= outside_max_u_ && outside_min_v_ <= out_v_ && out_v_ <= outside_max_v_) { if ((UINT)ref_u >= (UINT)lu || (UINT)ref_v >= (UINT)lv) must_calc = true; else { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; must_calc = calc_value && ((calc_value >> (ref_u & 7)) & 1); } if (must_calc) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_packed = _mm_setzero_ps(); for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (float)((filter[pix_out_f] * filter[pix_out_g]) >> 16); pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; notLessThan(0, pix_u); notLessThan(0, pix_v); notMoreThan(mu, pix_u); notMoreThan(mv, pix_v); int pix_in_pos = pix_u + pix_v * wrap_in; const TPixelCM32 *pix_in = buffer_in + pix_in_pos; int tone = pix_in->getTone(); int paint = pix_in->getPaint(); int ink = pix_in->getInk(); if (tone == TPixelCM32::getMaxTone()) pix_value_packed = _mm_load_ps((float *)&(paints[paint])); else if (tone == 0) pix_value_packed = _mm_load_ps((float *)&(inks[ink])); else { float tt = (float)tone; blendBySSE2(pix_value_packed, // il valore calcolato (float *)&(inks[ink]), (float *)&(paints[paint]), &tt, den_packed, zeros); } weight_packed = _mm_load1_ps(&weight); sum_contribs_packed = _mm_add_ps(sum_contribs_packed, _mm_mul_ps(pix_value_packed, weight_packed)); sum_weights += weight; } inv_sum_weights = 1.0f / sum_weights; __m128 inv_sum_weights_packed = _mm_load1_ps(&inv_sum_weights); __m128 out_fval_packed = _mm_mul_ps(sum_contribs_packed, inv_sum_weights_packed); out_fval_packed = _mm_max_ps(out_fval_packed, zeros2); out_fval_packed = _mm_min_ps(out_fval_packed, maxChanneValue_packed); __m128i out_value_packed_i = _mm_cvtps_epi32(out_fval_packed); out_value_packed_i = _mm_packs_epi32(out_value_packed_i, zeros); out_value_packed_i = _mm_packus_epi16(out_value_packed_i, zeros); *(DWORD *)(pix_out) = _mm_cvtsi128_si32(out_value_packed_i); } else { int pix_in_pos = ref_u + ref_v * wrap_in; const TPixelCM32 *pix_in = buffer_in + pix_in_pos; int tone = pix_in->getTone(); int paint = pix_in->getPaint(); int ink = pix_in->getInk(); if (tone == TPixelCM32::getMaxTone()) *pix_out = paints2[paint]; else if (tone == 0) *pix_out = inks2[ink]; else *pix_out = blend(inks2[ink], paints2[paint], tone, TPixelCM32::getMaxTone()); } } else { *pix_out = default_value; } } } if (calc) delete[] calc; } #endif /*---------------------------------------------------------------------------*/ namespace { template void resample_main_cm32_rgbm_bigradius( TRasterPT rout, const TRasterCM32P &rin, const TAffine &aff_xy2uv, const TAffine &aff0_uv2fg, int min_pix_ref_u, int min_pix_ref_v, int max_pix_ref_u, int max_pix_ref_v, int n_pix, int *pix_ref_u, int *pix_ref_v, int *pix_ref_f, int *pix_ref_g, short *filter, TPalette *palette) { // bigradius: cambia solo che i sum_contribs sono double invece che int const TPixelCM32 *buffer_in; T *buffer_out; int lu, lv, wrap_in, mu, mv; int lx, ly, wrap_out; int out_x, out_y; double out_x_, out_y_; double out_u_, out_v_; int ref_u, ref_v; int pix_u, pix_v; double ref_out_u_, ref_out_v_; double ref_out_f_, ref_out_g_; int ref_out_f, ref_out_g; int pix_out_f, pix_out_g; int inside_offset_u, inside_offset_v; UINT inside_limit_u, inside_limit_v; int inside_nonempty; double outside_min_u_, outside_min_v_; double outside_max_u_, outside_max_v_; UCHAR *calc; int calc_allocsize; int calc_bytewrap; UCHAR calc_value; bool must_calc; T pix_value; T default_value; float weight; float sum_weights; double inv_sum_weights; double sum_contribs_r, sum_contribs_g, sum_contribs_b, sum_contribs_m; double out_fval_r, out_fval_g, out_fval_b, out_fval_m; int out_value_r, out_value_g, out_value_b, out_value_m; int i; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif T *pix_out; default_value.r = 0; default_value.g = 0; default_value.b = 0; default_value.m = 0; if (!(rout->getLx() > 0 && rout->getLy() > 0)) return; if (!(rin->getLx() > 0 && rin->getLy() > 0)) { rout->clear(); return; } calc = 0; calc_allocsize = 0; create_calc(rin, min_pix_ref_u, max_pix_ref_u, min_pix_ref_v, max_pix_ref_v, calc, calc_allocsize, calc_bytewrap); buffer_in = rin->pixels(); buffer_out = rout->pixels(); lu = rin->getLx(); lx = rout->getLx(); lv = rin->getLy(); ly = rout->getLy(); wrap_in = rin->getWrap(); wrap_out = rout->getWrap(); mu = lu - 1; mv = lv - 1; inside_offset_u = -min_pix_ref_u; inside_offset_v = -min_pix_ref_v; inside_limit_u = lu - max_pix_ref_u - inside_offset_u; inside_limit_v = lv - max_pix_ref_v - inside_offset_v; inside_nonempty = (int)inside_limit_u > 0 && (int)inside_limit_v > 0; outside_min_u_ = -0.5; outside_min_v_ = -0.5; outside_max_u_ = lu - 0.5; outside_max_v_ = lv - 0.5; int colorCount = palette->getStyleCount(); colorCount = std::max( {colorCount, TPixelCM32::getMaxInk(), TPixelCM32::getMaxPaint()}); std::vector paints(colorCount); std::vector inks(colorCount); for (i = 0; i < palette->getStyleCount(); i++) paints[i] = inks[i] = ::premultiply(palette->getStyle(i)->getAverageColor()); for (out_y = 0, out_y_ = 0.0; out_y < ly; out_y++, out_y_ += 1.0) { for (out_x = 0, out_x_ = 0.0; out_x < lx; out_x++, out_x_ += 1.0) { pix_out = buffer_out + out_y * wrap_out + out_x; out_u_ = affMV1(aff_xy2uv, out_x_, out_y_); out_v_ = affMV2(aff_xy2uv, out_x_, out_y_); ref_u = intLE(out_u_); ref_v = intLE(out_v_); if (inside_nonempty && (UINT)(ref_u - inside_offset_u) < inside_limit_u && (UINT)(ref_v - inside_offset_v) < inside_limit_v) { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; if (calc_value && ((calc_value >> (ref_u & 7)) & 1)) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (float)((filter[pix_out_f] * filter[pix_out_g]) >> 16); pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; int pix_in_pos = pix_u + pix_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) pix_value = Converter::convert(paints[paint]); else if (tone == 0) pix_value = Converter::convert(inks[ink]); else pix_value = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); sum_contribs_r += (int)pix_value.r * weight; sum_contribs_g += (int)pix_value.g * weight; sum_contribs_b += (int)pix_value.b * weight; sum_contribs_m += (int)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(T::maxChannelValue, out_value_r); notMoreThan(T::maxChannelValue, out_value_g); notMoreThan(T::maxChannelValue, out_value_b); notMoreThan(T::maxChannelValue, out_value_m); pix_out->r = out_value_r; pix_out->g = out_value_g; pix_out->b = out_value_b; pix_out->m = out_value_m; } else { // *pix_out = buffer_in[ref_u + ref_v * wrap_in]; int pix_in_pos = ref_u + ref_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) *pix_out = Converter::convert(paints[paint]); else if (tone == 0) *pix_out = Converter::convert(inks[ink]); else *pix_out = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); } } else if (outside_min_u_ <= out_u_ && out_u_ <= outside_max_u_ && outside_min_v_ <= out_v_ && out_v_ <= outside_max_v_) { if ((UINT)ref_u >= (UINT)lu || (UINT)ref_v >= (UINT)lv) must_calc = true; else { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; must_calc = calc_value && ((calc_value >> (ref_u & 7)) & 1); } if (must_calc) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (float)((filter[pix_out_f] * filter[pix_out_g]) >> 16); pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; notLessThan(0, pix_u); notLessThan(0, pix_v); notMoreThan(mu, pix_u); notMoreThan(mv, pix_v); // pix_value = buffer_in[pix_u + pix_v * wrap_in]; int pix_in_pos = pix_u + pix_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) pix_value = Converter::convert(paints[paint]); else if (tone == 0) pix_value = Converter::convert(inks[ink]); else pix_value = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); sum_contribs_r += (int)pix_value.r * weight; sum_contribs_g += (int)pix_value.g * weight; sum_contribs_b += (int)pix_value.b * weight; sum_contribs_m += (int)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(T::maxChannelValue, out_value_r); notMoreThan(T::maxChannelValue, out_value_g); notMoreThan(T::maxChannelValue, out_value_b); notMoreThan(T::maxChannelValue, out_value_m); pix_out->r = out_value_r; pix_out->g = out_value_g; pix_out->b = out_value_b; pix_out->m = out_value_m; } else { int pix_in_pos = ref_u + ref_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) *pix_out = Converter::convert(paints[paint]); else if (tone == 0) *pix_out = Converter::convert(inks[ink]); else *pix_out = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); } } else { *pix_out = default_value; } } } if (calc) delete[] calc; } } /*---------------------------------------------------------------------------*/ template void resample_main_cm32_rgbm(TRasterPT rout, const TRasterCM32P &rin, const TAffine &aff_xy2uv, const TAffine &aff0_uv2fg, int min_pix_ref_u, int min_pix_ref_v, int max_pix_ref_u, int max_pix_ref_v, int n_pix, int *pix_ref_u, int *pix_ref_v, int *pix_ref_f, int *pix_ref_g, short *filter, TPalette *palette) { const TPixelCM32 *buffer_in; T *buffer_out; int lu, lv, wrap_in, mu, mv; int lx, ly, wrap_out; int out_x, out_y; double out_x_, out_y_; double out_u_, out_v_; int ref_u, ref_v; int pix_u, pix_v; double ref_out_u_, ref_out_v_; double ref_out_f_, ref_out_g_; int ref_out_f, ref_out_g; int pix_out_f, pix_out_g; int inside_offset_u, inside_offset_v; UINT inside_limit_u, inside_limit_v; int inside_nonempty; double outside_min_u_, outside_min_v_; double outside_max_u_, outside_max_v_; UCHAR *calc; int calc_allocsize; int calc_bytewrap; UCHAR calc_value; bool must_calc; T pix_value; T default_value(0, 0, 0, 0); int weight; int sum_weights; double inv_sum_weights; int sum_contribs_r, sum_contribs_g, sum_contribs_b, sum_contribs_m; double out_fval_r, out_fval_g, out_fval_b, out_fval_m; int out_value_r, out_value_g, out_value_b, out_value_m; T out_value; int i; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif if (n_pix >= 512 || T::maxChannelValue > 255) { resample_main_cm32_rgbm_bigradius( rout, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u, pix_ref_v, pix_ref_f, pix_ref_g, filter, palette); return; } if (!(rout->getLx() > 0 && rout->getLy() > 0)) return; if (!(rin->getLx() > 0 && rin->getLy() > 0)) { resample_clear_rgbm(rout, default_value); return; } calc = 0; calc_allocsize = 0; create_calc(rin, min_pix_ref_u, max_pix_ref_u, min_pix_ref_v, max_pix_ref_v, calc, calc_allocsize, calc_bytewrap); buffer_in = rin->pixels(); buffer_out = rout->pixels(); lu = rin->getLx(); lx = rout->getLx(); lv = rin->getLy(); ly = rout->getLy(); wrap_in = rin->getWrap(); wrap_out = rout->getWrap(); mu = lu - 1; mv = lv - 1; inside_offset_u = -min_pix_ref_u; inside_offset_v = -min_pix_ref_v; inside_limit_u = lu - max_pix_ref_u - inside_offset_u; inside_limit_v = lv - max_pix_ref_v - inside_offset_v; inside_nonempty = (int)inside_limit_u > 0 && (int)inside_limit_v > 0; outside_min_u_ = -0.5; outside_min_v_ = -0.5; outside_max_u_ = lu - 0.5; outside_max_v_ = lv - 0.5; int colorCount = palette->getStyleCount(); colorCount = std::max( {colorCount, TPixelCM32::getMaxInk(), TPixelCM32::getMaxPaint()}); std::vector paints(colorCount); std::vector inks(colorCount); for (i = 0; i < palette->getStyleCount(); i++) paints[i] = inks[i] = ::premultiply(palette->getStyle(i)->getAverageColor()); for (out_y = 0, out_y_ = 0.0; out_y < ly; out_y++, out_y_ += 1.0) { for (out_x = 0, out_x_ = 0.0; out_x < lx; out_x++, out_x_ += 1.0) { out_u_ = affMV1(aff_xy2uv, out_x_, out_y_); out_v_ = affMV2(aff_xy2uv, out_x_, out_y_); ref_u = intLE(out_u_); ref_v = intLE(out_v_); if (inside_nonempty && (UINT)(ref_u - inside_offset_u) < inside_limit_u && (UINT)(ref_v - inside_offset_v) < inside_limit_v) { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; if (calc_value && ((calc_value >> (ref_u & 7)) & 1)) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (filter[pix_out_f] * filter[pix_out_g]) >> 16; pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; // pix_value = buffer_in[pix_u + pix_v * wrap_in]; int pix_in_pos = pix_u + pix_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) pix_value = Converter::convert(paints[paint]); else if (tone == 0) pix_value = Converter::convert(inks[ink]); else pix_value = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); sum_contribs_r += (int)pix_value.r * weight; sum_contribs_g += (int)pix_value.g * weight; sum_contribs_b += (int)pix_value.b * weight; sum_contribs_m += (int)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(T::maxChannelValue, out_value_r); notMoreThan(T::maxChannelValue, out_value_g); notMoreThan(T::maxChannelValue, out_value_b); notMoreThan(T::maxChannelValue, out_value_m); out_value.r = out_value_r; out_value.g = out_value_g; out_value.b = out_value_b; out_value.m = out_value_m; } else { // out_value = buffer_in[ref_u + ref_v * wrap_in]; int pix_in_pos = ref_u + ref_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) out_value = Converter::convert(paints[paint]); else if (tone == 0) out_value = Converter::convert(inks[ink]); else out_value = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); } } else if (outside_min_u_ <= out_u_ && out_u_ <= outside_max_u_ && outside_min_v_ <= out_v_ && out_v_ <= outside_max_v_) { if ((UINT)ref_u >= (UINT)lu || (UINT)ref_v >= (UINT)lv) must_calc = true; else { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; must_calc = calc_value && ((calc_value >> (ref_u & 7)) & 1); } if (must_calc) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (filter[pix_out_f] * filter[pix_out_g]) >> 16; pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; notLessThan(0, pix_u); notLessThan(0, pix_v); notMoreThan(mu, pix_u); notMoreThan(mv, pix_v); // pix_value = buffer_in[pix_u + pix_v * wrap_in]; int pix_in_pos = pix_u + pix_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) pix_value = Converter::convert(paints[paint]); else if (tone == 0) pix_value = Converter::convert(inks[ink]); else pix_value = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); sum_contribs_r += (int)pix_value.r * weight; sum_contribs_g += (int)pix_value.g * weight; sum_contribs_b += (int)pix_value.b * weight; sum_contribs_m += (int)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(T::maxChannelValue, out_value_r); notMoreThan(T::maxChannelValue, out_value_g); notMoreThan(T::maxChannelValue, out_value_b); notMoreThan(T::maxChannelValue, out_value_m); out_value.r = out_value_r; out_value.g = out_value_g; out_value.b = out_value_b; out_value.m = out_value_m; } else { // out_value = buffer_in[ref_u + ref_v * wrap_in]; int pix_in_pos = ref_u + ref_v * wrap_in; int tone = buffer_in[pix_in_pos].getTone(); int paint = buffer_in[pix_in_pos].getPaint(); int ink = buffer_in[pix_in_pos].getInk(); if (tone == TPixelCM32::getMaxTone()) out_value = Converter::convert(paints[paint]); else if (tone == 0) out_value = Converter::convert(inks[ink]); else out_value = Converter::convert(blend( inks[ink], paints[paint], tone, TPixelCM32::getMaxTone())); } } else { out_value = default_value; } buffer_out[out_x + out_y * wrap_out] = out_value; } } if (calc) delete[] calc; } //--------------------------------------------------------------------------- void resample_cm32_rgbm(TRaster32P rout, const TRasterCM32P &rin, const TAffine &aff_xy2uv, const TAffine &aff0_uv2fg, int min_pix_ref_u, int min_pix_ref_v, int max_pix_ref_u, int max_pix_ref_v, int n_pix, int *pix_ref_u, int *pix_ref_v, int *pix_ref_f, int *pix_ref_g, short *filter, TPalette *palette) { const TPixelCM32 *buffer_in; /*T*/ TPixel32 *buffer_out; int lu, lv, wrap_in, mu, mv; int lx, ly, wrap_out; int out_x, out_y; double out_x_, out_y_; double out_u_, out_v_; int ref_u, ref_v; int pix_u, pix_v; double ref_out_u_, ref_out_v_; double ref_out_f_, ref_out_g_; int ref_out_f, ref_out_g; int pix_out_f, pix_out_g; int inside_offset_u, inside_offset_v; UINT inside_limit_u, inside_limit_v; int inside_nonempty; double outside_min_u_, outside_min_v_; double outside_max_u_, outside_max_v_; UCHAR *calc; int calc_allocsize; int calc_bytewrap; UCHAR calc_value; bool must_calc; /*T*/ TPixel32 pix_value; /*T*/ TPixel32 default_value(0, 0, 0, 0); int weight; int sum_weights; double inv_sum_weights; int sum_contribs_r, sum_contribs_g, sum_contribs_b, sum_contribs_m; double out_fval_r, out_fval_g, out_fval_b, out_fval_m; int out_value_r, out_value_g, out_value_b, out_value_m; /*T*/ TPixel32 out_value; int i; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif if (n_pix >= 512 || /*T*/ TPixel32::maxChannelValue > 255) { assert(false); /* resample_main_rgbm_bigradius( rout, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u, pix_ref_v, pix_ref_f, pix_ref_g, filter ); */ return; } if (!(rout->getLx() > 0 && rout->getLy() > 0)) return; if (!(rin->getLx() > 0 && rin->getLy() > 0)) { resample_clear_rgbm(rout, default_value); return; } int colorCount = palette->getStyleCount(); colorCount = std::max( {colorCount, TPixelCM32::getMaxInk(), TPixelCM32::getMaxPaint()}); std::vector paints(colorCount); std::vector inks(colorCount); for (i = 0; i < palette->getStyleCount(); i++) paints[i] = inks[i] = ::premultiply(palette->getStyle(i)->getAverageColor()); calc = 0; calc_allocsize = 0; create_calc(rin, min_pix_ref_u, max_pix_ref_u, min_pix_ref_v, max_pix_ref_v, calc, calc_allocsize, calc_bytewrap); buffer_in = rin->pixels(); buffer_out = rout->pixels(); lu = rin->getLx(); lx = rout->getLx(); lv = rin->getLy(); ly = rout->getLy(); wrap_in = rin->getWrap(); wrap_out = rout->getWrap(); mu = lu - 1; mv = lv - 1; inside_offset_u = -min_pix_ref_u; inside_offset_v = -min_pix_ref_v; inside_limit_u = lu - max_pix_ref_u - inside_offset_u; inside_limit_v = lv - max_pix_ref_v - inside_offset_v; inside_nonempty = (int)inside_limit_u > 0 && (int)inside_limit_v > 0; outside_min_u_ = -0.5; outside_min_v_ = -0.5; outside_max_u_ = lu - 0.5; outside_max_v_ = lv - 0.5; for (out_y = 0, out_y_ = 0.0; out_y < ly; out_y++, out_y_ += 1.0) { for (out_x = 0, out_x_ = 0.0; out_x < lx; out_x++, out_x_ += 1.0) { out_u_ = affMV1(aff_xy2uv, out_x_, out_y_); out_v_ = affMV2(aff_xy2uv, out_x_, out_y_); ref_u = intLE(out_u_); ref_v = intLE(out_v_); if (inside_nonempty && (UINT)(ref_u - inside_offset_u) < inside_limit_u && (UINT)(ref_v - inside_offset_v) < inside_limit_v) { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; if (calc_value && ((calc_value >> (ref_u & 7)) & 1)) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (filter[pix_out_f] * filter[pix_out_g]) >> 16; pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; // pix_value = buffer_in[pix_u + pix_v * wrap_in]; int pix_in_pos = pix_u + pix_v * wrap_in; int t = buffer_in[pix_in_pos].getTone(); int p = buffer_in[pix_in_pos].getPaint(); int i = buffer_in[pix_in_pos].getInk(); if (t == TPixelCM32::getMaxTone()) pix_value = paints[p]; else if (t == 0) pix_value = inks[i]; else pix_value = blend(inks[i], paints[p], t, TPixelCM32::getMaxTone()); sum_contribs_r += (int)pix_value.r * weight; sum_contribs_g += (int)pix_value.g * weight; sum_contribs_b += (int)pix_value.b * weight; sum_contribs_m += (int)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_r); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_g); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_b); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_m); out_value.r = out_value_r; out_value.g = out_value_g; out_value.b = out_value_b; out_value.m = out_value_m; } else { int pix_in_pos = ref_u + ref_v * wrap_in; int t = buffer_in[pix_in_pos].getTone(); int p = buffer_in[pix_in_pos].getPaint(); int i = buffer_in[pix_in_pos].getInk(); if (t == TPixelCM32::getMaxTone()) out_value = paints[p]; else if (t == 0) out_value = inks[i]; else out_value = blend(inks[i], paints[p], t, TPixelCM32::getMaxTone()); // out_value = buffer_in[ref_u + ref_v * wrap_in]; } } else if (outside_min_u_ <= out_u_ && out_u_ <= outside_max_u_ && outside_min_v_ <= out_v_ && out_v_ <= outside_max_v_) { if ((UINT)ref_u >= (UINT)lu || (UINT)ref_v >= (UINT)lv) must_calc = true; else { calc_value = calc[(ref_u >> 3) + ref_v * calc_bytewrap]; must_calc = calc_value && ((calc_value >> (ref_u & 7)) & 1); } if (must_calc) { ref_out_u_ = ref_u - out_u_; ref_out_v_ = ref_v - out_v_; ref_out_f_ = aff0MV1(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_g_ = aff0MV2(aff0_uv2fg, ref_out_u_, ref_out_v_); ref_out_f = tround(ref_out_f_); ref_out_g = tround(ref_out_g_); sum_weights = 0; sum_contribs_r = 0; sum_contribs_g = 0; sum_contribs_b = 0; sum_contribs_m = 0; for (i = n_pix - 1; i >= 0; i--) { pix_out_f = pix_ref_f[i] + ref_out_f; pix_out_g = pix_ref_g[i] + ref_out_g; weight = (filter[pix_out_f] * filter[pix_out_g]) >> 16; pix_u = pix_ref_u[i] + ref_u; pix_v = pix_ref_v[i] + ref_v; notLessThan(0, pix_u); notLessThan(0, pix_v); notMoreThan(mu, pix_u); notMoreThan(mv, pix_v); // pix_value = buffer_in[pix_u + pix_v * wrap_in]; int pix_in_pos = pix_u + pix_v * wrap_in; int t = buffer_in[pix_in_pos].getTone(); int p = buffer_in[pix_in_pos].getPaint(); int i = buffer_in[pix_in_pos].getInk(); if (t == TPixelCM32::getMaxTone()) pix_value = paints[p]; else if (t == 0) pix_value = inks[i]; else pix_value = blend(inks[i], paints[p], t, TPixelCM32::getMaxTone()); sum_contribs_r += (int)pix_value.r * weight; sum_contribs_g += (int)pix_value.g * weight; sum_contribs_b += (int)pix_value.b * weight; sum_contribs_m += (int)pix_value.m * weight; sum_weights += weight; } inv_sum_weights = 1.0 / sum_weights; out_fval_r = sum_contribs_r * inv_sum_weights; out_fval_g = sum_contribs_g * inv_sum_weights; out_fval_b = sum_contribs_b * inv_sum_weights; out_fval_m = sum_contribs_m * inv_sum_weights; notLessThan(0.0, out_fval_r); notLessThan(0.0, out_fval_g); notLessThan(0.0, out_fval_b); notLessThan(0.0, out_fval_m); out_value_r = troundp(out_fval_r); out_value_g = troundp(out_fval_g); out_value_b = troundp(out_fval_b); out_value_m = troundp(out_fval_m); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_r); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_g); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_b); notMoreThan(/*T*/ TPixel32::maxChannelValue, out_value_m); out_value.r = out_value_r; out_value.g = out_value_g; out_value.b = out_value_b; out_value.m = out_value_m; } else { // out_value = buffer_in[ref_u + ref_v * wrap_in]; int pix_in_pos = ref_u + ref_v * wrap_in; int t = buffer_in[pix_in_pos].getTone(); int p = buffer_in[pix_in_pos].getPaint(); int i = buffer_in[pix_in_pos].getInk(); if (t == TPixelCM32::getMaxTone()) out_value = paints[p]; else if (t == 0) out_value = inks[i]; else out_value = blend(inks[i], paints[p], t, TPixelCM32::getMaxTone()); } } else { out_value = default_value; } buffer_out[out_x + out_y * wrap_out] = out_value; } } if (calc) delete[] calc; } //--------------------------------------------------------------------------- template void rop_resample_rgbm_2(TRasterPT rout, const TRasterCM32P &rin, const TAffine &aff, TRop::ResampleFilterType flt_type, double blur, TPalette *palette) { #define FILTER_RESOLUTION 1024 #define MAX_FILTER_VAL 32767 #ifdef USE_STATIC_VARS static TRop::ResampleFilterType current_flt_type = TRop::None; static std::unique_ptr filter_array; static short *filter = 0; static int min_filter_fg, max_filter_fg; static int filter_array_size = 0; static int n_pix = 0; static std::unique_ptr pix_ref_u; static std::unique_ptr pix_ref_v; static std::unique_ptr pix_ref_f; static std::unique_ptr pix_ref_g; static int current_max_n_pix = 0; #else std::unique_ptr filter_array; short *filter = 0; int min_filter_fg, max_filter_fg; int filter_array_size = 0; int n_pix = 0; std::unique_ptr pix_ref_u; std::unique_ptr pix_ref_v; std::unique_ptr pix_ref_f; std::unique_ptr pix_ref_g; int current_max_n_pix = 0; #endif int filter_st_radius; int filter_fg_radius; int filter_size; int f; double s_; double weight_; int weight; TAffine aff_uv2xy; TAffine aff_xy2uv; TAffine aff0_uv2xy; TAffine aff0_xy2st; TAffine aff0_uv2st; TAffine aff0_st2fg; TAffine aff0_uv2fg; TAffine aff0_fg2uv; double scale_x, scale_y; double inv_blur; int max_n_pix; double min_pix_out_u_, min_pix_out_v_; double max_pix_out_u_, max_pix_out_v_; int min_pix_ref_u, min_pix_ref_v; int max_pix_ref_u, max_pix_ref_v; int cur_pix_ref_u, cur_pix_ref_v; double cur_pix_ref_f_, cur_pix_ref_g_; int cur_pix_ref_f, cur_pix_ref_g; double min_ref_out_f_, min_ref_out_g_; double max_ref_out_f_, max_ref_out_g_; int min_ref_out_f, min_ref_out_g; int max_ref_out_f, max_ref_out_g; int min_pix_ref_f, min_pix_ref_g; int max_pix_ref_f, max_pix_ref_g; int min_pix_out_f, min_pix_out_g; int max_pix_out_f, max_pix_out_g; int min_pix_out_fg; int max_pix_out_fg; #ifdef USE_DOUBLE_TO_INT double d2iaux; #endif assert(flt_type != TRop::None); filter_st_radius = get_filter_radius(flt_type); filter_fg_radius = filter_st_radius * FILTER_RESOLUTION; aff_uv2xy = aff; aff0_uv2xy = aff_uv2xy.place(0.0, 0.0, 0.0, 0.0); aff_xy2uv = aff_uv2xy.inv(); scale_x = sqrt(sq(aff_uv2xy.a11) + sq(aff_uv2xy.a12)); scale_y = sqrt(sq(aff_uv2xy.a21) + sq(aff_uv2xy.a22)); aff0_xy2st = TScale((scale_x > 1.0) ? 1.0 / scale_x : 1.0, (scale_y > 1.0) ? 1.0 / scale_y : 1.0); if (blur > 1.0) // per ora il blur e' 1.0 { inv_blur = 1.0 / blur; aff0_xy2st = TScale(inv_blur, inv_blur) * aff0_xy2st; } aff0_uv2st = aff0_xy2st * aff0_uv2xy; aff0_st2fg = TScale(FILTER_RESOLUTION, FILTER_RESOLUTION); aff0_uv2fg = aff0_st2fg * aff0_uv2st; aff0_fg2uv = aff0_uv2fg.inv(); minmax(-filter_fg_radius, -filter_fg_radius, filter_fg_radius, filter_fg_radius, aff0_fg2uv, min_pix_out_u_, min_pix_out_v_, max_pix_out_u_, max_pix_out_v_); min_pix_ref_u = intGT(min_pix_out_u_); min_pix_ref_v = intGT(min_pix_out_v_); max_pix_ref_u = intLT(max_pix_out_u_) + 1; max_pix_ref_v = intLT(max_pix_out_v_) + 1; if (blur <= 1.0) { if (aff_uv2xy.a12 == 0.0 && aff_uv2xy.a21 == 0.0) { if (aff_uv2xy.a11 == 1.0 && isInt(aff_uv2xy.a13)) { min_pix_ref_u = 0; max_pix_ref_u = 0; } if (aff_uv2xy.a22 == 1.0 && isInt(aff_uv2xy.a23)) { min_pix_ref_v = 0; max_pix_ref_v = 0; } } else if (aff_uv2xy.a11 == 0.0 && aff_uv2xy.a22 == 0.0) { if (aff_uv2xy.a12 == 1.0 && isInt(aff_uv2xy.a13)) { min_pix_ref_v = 0; max_pix_ref_v = 0; } if (aff_uv2xy.a21 == 1.0 && isInt(aff_uv2xy.a23)) { min_pix_ref_u = 0; max_pix_ref_u = 0; } } } max_n_pix = (max_pix_ref_u - min_pix_ref_u + 1) * (max_pix_ref_v - min_pix_ref_v + 1); if (max_n_pix > current_max_n_pix) { current_max_n_pix = max_n_pix; pix_ref_u.reset(new int[current_max_n_pix]); pix_ref_v.reset(new int[current_max_n_pix]); pix_ref_f.reset(new int[current_max_n_pix]); pix_ref_g.reset(new int[current_max_n_pix]); assert(pix_ref_u && pix_ref_v && pix_ref_f && pix_ref_g); } minmax(-1, -1, 0, 0, aff0_uv2fg, min_ref_out_f_, min_ref_out_g_, max_ref_out_f_, max_ref_out_g_); min_ref_out_f = tround(min_ref_out_f_); min_ref_out_g = tround(min_ref_out_g_); max_ref_out_f = tround(max_ref_out_f_); max_ref_out_g = tround(max_ref_out_g_); min_pix_ref_f = -filter_fg_radius - max_ref_out_f; min_pix_ref_g = -filter_fg_radius - max_ref_out_g; max_pix_ref_f = filter_fg_radius - min_ref_out_f; max_pix_ref_g = filter_fg_radius - min_ref_out_g; min_pix_out_f = c_maxint; min_pix_out_g = c_maxint; max_pix_out_f = c_minint; max_pix_out_g = c_minint; n_pix = 0; for (cur_pix_ref_v = min_pix_ref_v; cur_pix_ref_v <= max_pix_ref_v; cur_pix_ref_v++) for (cur_pix_ref_u = min_pix_ref_u; cur_pix_ref_u <= max_pix_ref_u; cur_pix_ref_u++) { cur_pix_ref_f_ = affMV1(aff0_uv2fg, cur_pix_ref_u, cur_pix_ref_v); cur_pix_ref_g_ = affMV2(aff0_uv2fg, cur_pix_ref_u, cur_pix_ref_v); cur_pix_ref_f = tround(cur_pix_ref_f_); cur_pix_ref_g = tround(cur_pix_ref_g_); if (min_pix_ref_f <= cur_pix_ref_f && cur_pix_ref_f <= max_pix_ref_f && min_pix_ref_g <= cur_pix_ref_g && cur_pix_ref_g <= max_pix_ref_g) { pix_ref_u[n_pix] = cur_pix_ref_u; pix_ref_v[n_pix] = cur_pix_ref_v; pix_ref_f[n_pix] = cur_pix_ref_f; pix_ref_g[n_pix] = cur_pix_ref_g; notMoreThan(cur_pix_ref_f + min_ref_out_f, min_pix_out_f); notMoreThan(cur_pix_ref_g + min_ref_out_g, min_pix_out_g); notLessThan(cur_pix_ref_f + max_ref_out_f, max_pix_out_f); notLessThan(cur_pix_ref_g + max_ref_out_g, max_pix_out_g); n_pix++; } } assert(n_pix > 0); #ifdef USE_STATIC_VARS if (flt_type != current_flt_type) { current_flt_type = flt_type; #endif min_filter_fg = -filter_fg_radius - FILTER_RESOLUTION * 3 / 2; max_filter_fg = filter_fg_radius + FILTER_RESOLUTION * 3 / 2; filter_size = max_filter_fg - min_filter_fg + 1; if (filter_size > filter_array_size) { filter_array.reset(new short[filter_size]); assert(filter_array); filter_array_size = filter_size; } filter = filter_array.get() - min_filter_fg; filter[0] = MAX_FILTER_VAL; for (f = 1, s_ = 1.0 / FILTER_RESOLUTION; f < filter_fg_radius; f++, s_ += 1.0 / FILTER_RESOLUTION) { weight_ = get_filter_value(flt_type, s_) * (double)MAX_FILTER_VAL; weight = tround(weight_); filter[f] = weight; filter[-f] = weight; } for (f = filter_fg_radius; f <= max_filter_fg; f++) filter[f] = 0; for (f = -filter_fg_radius; f >= min_filter_fg; f--) filter[f] = 0; #ifdef USE_STATIC_VARS } #endif min_pix_out_fg = std::min(min_pix_out_f, min_pix_out_g); max_pix_out_fg = std::max(max_pix_out_f, max_pix_out_g); if (min_pix_out_fg < min_filter_fg || max_pix_out_fg > max_filter_fg) { filter_size = max_pix_out_fg - min_pix_out_fg + 1; if (filter_size > filter_array_size) { // controllare!! // TREALLOC (filter_array, filter_size) filter_array.reset(new short[filter_size]); assert(filter_array); filter_array_size = filter_size; } filter = filter_array.get() - min_filter_fg; if (min_pix_out_fg < min_filter_fg) { int delta = min_filter_fg - min_pix_out_fg; for (f = max_filter_fg; f >= min_filter_fg; f--) filter[f + delta] = filter[f]; filter += delta; for (f = min_filter_fg - 1; f >= min_pix_out_fg; f--) filter[f] = 0; min_filter_fg = min_pix_out_fg; } if (max_pix_out_fg > max_filter_fg) { for (f = max_filter_fg + 1; f <= max_pix_out_fg; f++) filter[f] = 0; max_filter_fg = max_pix_out_fg; } } #if defined(USE_SSE2) && defined(_MSC_VER) TRaster32P rout32 = rout; if ((TSystem::getCPUExtensions() & TSystem::CpuSupportsSse2) && rout32) resample_main_cm32_rgbm_SSE2( rout32, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u.get(), pix_ref_v.get(), pix_ref_f.get(), pix_ref_g.get(), filter, palette); else #endif resample_main_cm32_rgbm( rout, rin, aff_xy2uv, aff0_uv2fg, min_pix_ref_u, min_pix_ref_v, max_pix_ref_u, max_pix_ref_v, n_pix, pix_ref_u.get(), pix_ref_v.get(), pix_ref_f.get(), pix_ref_g.get(), filter, palette); } //----------------------------------------------------------------------------- } // namespace //----------------------------------------------------------------------------- void TRop::resample(const TRasterP &out, const TRasterCM32P &in, const TPaletteP palette, const TAffine &aff, TRop::ResampleFilterType filterType, double blur) { TRasterP rin = in; TRaster32P rout32 = out; in->lock(); out->lock(); if (rout32) rop_resample_rgbm_2(rout32, rin, aff, filterType, blur, palette.getPointer()); else { TRaster64P rout64 = out; if (rout64) rop_resample_rgbm_2(rout64, rin, aff, filterType, blur, palette.getPointer()); else { in->unlock(); out->unlock(); throw TRopException("unsupported pixel type"); return; } } in->unlock(); out->unlock(); } #endif // TNZCORE_LIGHT void TRop::resample(const TRasterP &rout, const TRasterP &rin, const TAffine &aff, ResampleFilterType filterType, double blur) { rin->lock(); rout->lock(); if (filterType == ClosestPixel || filterType == Bilinear) { if ((TRaster64P)rout || (TRaster64P)rin) filterType = Triangle; else { quickResample(rout, rin, aff, filterType); rin->unlock(); rout->unlock(); return; } } TRaster32P rout32 = rout, rin32 = rin; if (rout32) { if (!rin32) { rin32 = TRaster32P(rin->getLx(), rin->getLy()); TRop::convert(rin32, rin); } do_resample(rout32, rin32, aff, filterType, blur); } else { #ifndef TNZCORE_LIGHT TRasterCM32P routCM32 = rout, rinCM32 = rin; if (routCM32 && rinCM32) do_resample(routCM32, rinCM32, aff); else #endif { TRaster64P rout64 = rout, rin64 = rin; if (rout64) { if (!rin64) { rin64 = TRaster64P(rin->getLx(), rin->getLy()); TRop::convert(rin64, rin); } do_resample(rout64, rin64, aff, filterType, blur); } else { TRasterGR8P routGR8 = rout, rinGR8 = rin; TRaster32P rin32 = rin; if (routGR8 && rinGR8) do_resample(routGR8, rinGR8, aff, filterType, blur); else if (routGR8 && rin32) do_resample(routGR8, rin32, aff, filterType, blur); else { rin->unlock(); rout->unlock(); throw TRopException("unsupported pixel type"); } } } } rin->unlock(); rout->unlock(); } //-----------------------------------------------------------------------------