tahoma2d/thirdparty/superlu/SuperLU_4.1/SRC/ilu_sdrop_row.c
2016-03-24 01:31:57 +09:00

329 lines
8.5 KiB
C

/*! @file ilu_sdrop_row.c
* \brief Drop small rows from L
*
* <pre>
* -- SuperLU routine (version 4.1) --
* Lawrence Berkeley National Laboratory.
* June 30, 2009
* </pre>
*/
#include <math.h>
#include <stdlib.h>
#include "slu_sdefs.h"
extern void sswap_(int *, float [], int *, float [], int *);
extern void saxpy_(int *, float *, float [], int *, float [], int *);
extern void scopy_(int *, float [], int *, float [], int *);
extern float sasum_(int *, float *, int *);
extern float snrm2_(int *, float *, int *);
extern double dnrm2_(int *, double [], int *);
extern int isamax_(int *, float [], int *);
static float *A; /* used in _compare_ only */
static int _compare_(const void *a, const void *b)
{
register int *x = (int *)a, *y = (int *)b;
if (A[*x] - A[*y] > 0.0) return -1;
else if (A[*x] - A[*y] < 0.0) return 1;
else return 0;
}
/*! \brief
* <pre>
* Purpose
* =======
* ilu_sdrop_row() - Drop some small rows from the previous
* supernode (L-part only).
* </pre>
*/
int ilu_sdrop_row(
superlu_options_t *options, /* options */
int first, /* index of the first column in the supernode */
int last, /* index of the last column in the supernode */
double drop_tol, /* dropping parameter */
int quota, /* maximum nonzero entries allowed */
int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */
double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots,
* does not change if options->ILU_MILU != SMILU1 */
GlobalLU_t *Glu, /* modified */
float swork[], /* working space
* the length of swork[] should be no less than
* the number of rows in the supernode */
float swork2[], /* working space with the same size as swork[],
* used only by the second dropping rule */
int lastc /* if lastc == 0, there is nothing after the
* working supernode [first:last];
* if lastc == 1, there is one more column after
* the working supernode. */ )
{
register int i, j, k, m1;
register int nzlc; /* number of nonzeros in column last+1 */
register int xlusup_first, xlsub_first;
int m, n; /* m x n is the size of the supernode */
int r = 0; /* number of dropped rows */
register float *temp;
register float *lusup = Glu->lusup;
register int *lsub = Glu->lsub;
register int *xlsub = Glu->xlsub;
register int *xlusup = Glu->xlusup;
register float d_max = 0.0, d_min = 1.0;
int drop_rule = options->ILU_DropRule;
milu_t milu = options->ILU_MILU;
norm_t nrm = options->ILU_Norm;
float zero = 0.0;
float one = 1.0;
float none = -1.0;
int i_1 = 1;
int inc_diag; /* inc_diag = m + 1 */
int nzp = 0; /* number of zero pivots */
float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);
xlusup_first = xlusup[first];
xlsub_first = xlsub[first];
m = xlusup[first + 1] - xlusup_first;
n = last - first + 1;
m1 = m - 1;
inc_diag = m + 1;
nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
temp = swork - n;
/* Quick return if nothing to do. */
if (m == 0 || m == n || drop_rule == NODROP)
{
*nnzLj += m * n;
return 0;
}
/* basic dropping: ILU(tau) */
for (i = n; i <= m1; )
{
/* the average abs value of ith row */
switch (nrm)
{
case ONE_NORM:
temp[i] = sasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
break;
case TWO_NORM:
temp[i] = snrm2_(&n, &lusup[xlusup_first + i], &m)
/ sqrt((double)n);
break;
case INF_NORM:
default:
k = isamax_(&n, &lusup[xlusup_first + i], &m) - 1;
temp[i] = fabs(lusup[xlusup_first + i + m * k]);
break;
}
/* drop small entries due to drop_tol */
if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
{
r++;
/* drop the current row and move the last undropped row here */
if (r > 1) /* add to last row */
{
/* accumulate the sum (for MILU) */
switch (milu)
{
case SMILU_1:
case SMILU_2:
saxpy_(&n, &one, &lusup[xlusup_first + i], &m,
&lusup[xlusup_first + m - 1], &m);
break;
case SMILU_3:
for (j = 0; j < n; j++)
lusup[xlusup_first + (m - 1) + j * m] +=
fabs(lusup[xlusup_first + i + j * m]);
break;
case SILU:
default:
break;
}
scopy_(&n, &lusup[xlusup_first + m1], &m,
&lusup[xlusup_first + i], &m);
} /* if (r > 1) */
else /* move to last row */
{
sswap_(&n, &lusup[xlusup_first + m1], &m,
&lusup[xlusup_first + i], &m);
if (milu == SMILU_3)
for (j = 0; j < n; j++) {
lusup[xlusup_first + m1 + j * m] =
fabs(lusup[xlusup_first + m1 + j * m]);
}
}
lsub[xlsub_first + i] = lsub[xlsub_first + m1];
m1--;
continue;
} /* if dropping */
else
{
if (temp[i] > d_max) d_max = temp[i];
if (temp[i] < d_min) d_min = temp[i];
}
i++;
} /* for */
/* Secondary dropping: drop more rows according to the quota. */
quota = ceil((double)quota / (double)n);
if (drop_rule & DROP_SECONDARY && m - r > quota)
{
register double tol = d_max;
/* Calculate the second dropping tolerance */
if (quota > n)
{
if (drop_rule & DROP_INTERP) /* by interpolation */
{
d_max = 1.0 / d_max; d_min = 1.0 / d_min;
tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
}
else /* by quick select */
{
int len = m1 - n + 1;
scopy_(&len, swork, &i_1, swork2, &i_1);
tol = sqselect(len, swork2, quota - n);
#if 0
register int *itemp = iwork - n;
A = temp;
for (i = n; i <= m1; i++) itemp[i] = i;
qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
tol = temp[itemp[quota]];
#endif
}
}
for (i = n; i <= m1; )
{
if (temp[i] <= tol)
{
register int j;
r++;
/* drop the current row and move the last undropped row here */
if (r > 1) /* add to last row */
{
/* accumulate the sum (for MILU) */
switch (milu)
{
case SMILU_1:
case SMILU_2:
saxpy_(&n, &one, &lusup[xlusup_first + i], &m,
&lusup[xlusup_first + m - 1], &m);
break;
case SMILU_3:
for (j = 0; j < n; j++)
lusup[xlusup_first + (m - 1) + j * m] +=
fabs(lusup[xlusup_first + i + j * m]);
break;
case SILU:
default:
break;
}
scopy_(&n, &lusup[xlusup_first + m1], &m,
&lusup[xlusup_first + i], &m);
} /* if (r > 1) */
else /* move to last row */
{
sswap_(&n, &lusup[xlusup_first + m1], &m,
&lusup[xlusup_first + i], &m);
if (milu == SMILU_3)
for (j = 0; j < n; j++) {
lusup[xlusup_first + m1 + j * m] =
fabs(lusup[xlusup_first + m1 + j * m]);
}
}
lsub[xlsub_first + i] = lsub[xlsub_first + m1];
m1--;
temp[i] = temp[m1];
continue;
}
i++;
} /* for */
} /* if secondary dropping */
for (i = n; i < m; i++) temp[i] = 0.0;
if (r == 0)
{
*nnzLj += m * n;
return 0;
}
/* add dropped entries to the diagnal */
if (milu != SILU)
{
register int j;
float t;
float omega;
for (j = 0; j < n; j++)
{
t = lusup[xlusup_first + (m - 1) + j * m];
if (t == zero) continue;
if (t > zero)
omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / t, 1.0);
else
omega = SUPERLU_MAX(2.0 * (1.0 - alpha) / t, -1.0);
t *= omega;
switch (milu)
{
case SMILU_1:
if (t != none) {
lusup[xlusup_first + j * inc_diag] *= (one + t);
}
else
{
lusup[xlusup_first + j * inc_diag] *= *fill_tol;
#ifdef DEBUG
printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
fflush(stdout);
#endif
nzp++;
}
break;
case SMILU_2:
lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t));
break;
case SMILU_3:
lusup[xlusup_first + j * inc_diag] *= (one + t);
break;
case SILU:
default:
break;
}
}
if (nzp > 0) *fill_tol = -nzp;
}
/* Remove dropped entries from the memory and fix the pointers. */
m1 = m - r;
for (j = 1; j < n; j++)
{
register int tmp1, tmp2;
tmp1 = xlusup_first + j * m1;
tmp2 = xlusup_first + j * m;
for (i = 0; i < m1; i++)
lusup[i + tmp1] = lusup[i + tmp2];
}
for (i = 0; i < nzlc; i++)
lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
for (i = 0; i < nzlc; i++)
lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
for (i = first + 1; i <= last + 1; i++)
{
xlusup[i] -= r * (i - first);
xlsub[i] -= r;
}
if (lastc)
{
xlusup[last + 2] -= r * n;
xlsub[last + 2] -= r;
}
*nnzLj += (m - r) * n;
return r;
}