tahoma2d/thirdparty/superlu/SuperLU_4.1/INSTALL/slamch.c
2016-03-24 01:31:57 +09:00

982 lines
24 KiB
C

#include <stdio.h>
#define TRUE_ (1)
#define FALSE_ (0)
#define min(a,b) ((a) <= (b) ? (a) : (b))
#define max(a,b) ((a) >= (b) ? (a) : (b))
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (double)abs(x)
float slamch_(char *cmach)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1992
Purpose
=======
SLAMCH determines single precision machine parameters.
Arguments
=========
CMACH (input) CHARACTER*1
Specifies the value to be returned by SLAMCH:
= 'E' or 'e', SLAMCH := eps
= 'S' or 's , SLAMCH := sfmin
= 'B' or 'b', SLAMCH := base
= 'P' or 'p', SLAMCH := eps*base
= 'N' or 'n', SLAMCH := t
= 'R' or 'r', SLAMCH := rnd
= 'M' or 'm', SLAMCH := emin
= 'U' or 'u', SLAMCH := rmin
= 'L' or 'l', SLAMCH := emax
= 'O' or 'o', SLAMCH := rmax
where
eps = relative machine precision
sfmin = safe minimum, such that 1/sfmin does not overflow
base = base of the machine
prec = eps*base
t = number of (base) digits in the mantissa
rnd = 1.0 when rounding occurs in addition, 0.0 otherwise
emin = minimum exponent before (gradual) underflow
rmin = underflow threshold - base**(emin-1)
emax = largest exponent before overflow
rmax = overflow threshold - (base**emax)*(1-eps)
=====================================================================
*/
/* >>Start of File<<
Initialized data */
static int first = TRUE_;
/* System generated locals */
int i__1;
float ret_val;
/* Builtin functions */
double pow_ri(float *, int *);
/* Local variables */
static float base;
static int beta;
static float emin, prec, emax;
static int imin, imax;
static int lrnd;
static float rmin, rmax, t, rmach;
extern int lsame_(char *, char *);
static float small, sfmin;
extern /* Subroutine */ int slamc2_(int *, int *, int *, float
*, int *, float *, int *, float *);
static int it;
static float rnd, eps;
if (first) {
first = FALSE_;
slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
base = (float) beta;
t = (float) it;
if (lrnd) {
rnd = 1.f;
i__1 = 1 - it;
eps = pow_ri(&base, &i__1) / 2;
} else {
rnd = 0.f;
i__1 = 1 - it;
eps = pow_ri(&base, &i__1);
}
prec = eps * base;
emin = (float) imin;
emax = (float) imax;
sfmin = rmin;
small = 1.f / rmax;
if (small >= sfmin) {
/* Use SMALL plus a bit, to avoid the possibility of rou
nding
causing overflow when computing 1/sfmin. */
sfmin = small * (eps + 1.f);
}
}
if (lsame_(cmach, "E")) {
rmach = eps;
} else if (lsame_(cmach, "S")) {
rmach = sfmin;
} else if (lsame_(cmach, "B")) {
rmach = base;
} else if (lsame_(cmach, "P")) {
rmach = prec;
} else if (lsame_(cmach, "N")) {
rmach = t;
} else if (lsame_(cmach, "R")) {
rmach = rnd;
} else if (lsame_(cmach, "M")) {
rmach = emin;
} else if (lsame_(cmach, "U")) {
rmach = rmin;
} else if (lsame_(cmach, "L")) {
rmach = emax;
} else if (lsame_(cmach, "O")) {
rmach = rmax;
}
ret_val = rmach;
return ret_val;
/* End of SLAMCH */
} /* slamch_ */
/* Subroutine */ int slamc1_(int *beta, int *t, int *rnd, int
*ieee1)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1992
Purpose
=======
SLAMC1 determines the machine parameters given by BETA, T, RND, and
IEEE1.
Arguments
=========
BETA (output) INT
The base of the machine.
T (output) INT
The number of ( BETA ) digits in the mantissa.
RND (output) INT
Specifies whether proper rounding ( RND = .TRUE. ) or
chopping ( RND = .FALSE. ) occurs in addition. This may not
be a reliable guide to the way in which the machine performs
its arithmetic.
IEEE1 (output) INT
Specifies whether rounding appears to be done in the IEEE
'round to nearest' style.
Further Details
===============
The routine is based on the routine ENVRON by Malcolm and
incorporates suggestions by Gentleman and Marovich. See
Malcolm M. A. (1972) Algorithms to reveal properties of
floating-point arithmetic. Comms. of the ACM, 15, 949-951.
Gentleman W. M. and Marovich S. B. (1974) More on algorithms
that reveal properties of floating point arithmetic units.
Comms. of the ACM, 17, 276-277.
=====================================================================
*/
/* Initialized data */
static int first = TRUE_;
/* System generated locals */
float r__1, r__2;
/* Local variables */
static int lrnd;
static float a, b, c, f;
static int lbeta;
static float savec;
static int lieee1;
static float t1, t2;
extern double slamc3_(float *, float *);
static int lt;
static float one, qtr;
if (first) {
first = FALSE_;
one = 1.f;
/* LBETA, LIEEE1, LT and LRND are the local values of BE
TA,
IEEE1, T and RND.
Throughout this routine we use the function SLAMC3 to ens
ure
that relevant values are stored and not held in registers,
or
are not affected by optimizers.
Compute a = 2.0**m with the smallest positive integer m s
uch
that
fl( a + 1.0 ) = a. */
a = 1.f;
c = 1.f;
/* + WHILE( C.EQ.ONE )LOOP */
L10:
if (c == one) {
a *= 2;
c = slamc3_(&a, &one);
r__1 = -(double)a;
c = slamc3_(&c, &r__1);
goto L10;
}
/* + END WHILE
Now compute b = 2.0**m with the smallest positive integer
m
such that
fl( a + b ) .gt. a. */
b = 1.f;
c = slamc3_(&a, &b);
/* + WHILE( C.EQ.A )LOOP */
L20:
if (c == a) {
b *= 2;
c = slamc3_(&a, &b);
goto L20;
}
/* + END WHILE
Now compute the base. a and c are neighbouring floating po
int
numbers in the interval ( beta**t, beta**( t + 1 ) ) and
so
their difference is beta. Adding 0.25 to c is to ensure that
it
is truncated to beta and not ( beta - 1 ). */
qtr = one / 4;
savec = c;
r__1 = -(double)a;
c = slamc3_(&c, &r__1);
lbeta = c + qtr;
/* Now determine whether rounding or chopping occurs, by addin
g a
bit less than beta/2 and a bit more than beta/2 to
a. */
b = (float) lbeta;
r__1 = b / 2;
r__2 = -(double)b / 100;
f = slamc3_(&r__1, &r__2);
c = slamc3_(&f, &a);
if (c == a) {
lrnd = TRUE_;
} else {
lrnd = FALSE_;
}
r__1 = b / 2;
r__2 = b / 100;
f = slamc3_(&r__1, &r__2);
c = slamc3_(&f, &a);
if (lrnd && c == a) {
lrnd = FALSE_;
}
/* Try and decide whether rounding is done in the IEEE 'round
to
nearest' style. B/2 is half a unit in the last place of the
two
numbers A and SAVEC. Furthermore, A is even, i.e. has last
bit
zero, and SAVEC is odd. Thus adding B/2 to A should not cha
nge
A, but adding B/2 to SAVEC should change SAVEC. */
r__1 = b / 2;
t1 = slamc3_(&r__1, &a);
r__1 = b / 2;
t2 = slamc3_(&r__1, &savec);
lieee1 = t1 == a && t2 > savec && lrnd;
/* Now find the mantissa, t. It should be the integer part
of
log to the base beta of a, however it is safer to determine
t
by powering. So we find t as the smallest positive integer
for
which
fl( beta**t + 1.0 ) = 1.0. */
lt = 0;
a = 1.f;
c = 1.f;
/* + WHILE( C.EQ.ONE )LOOP */
L30:
if (c == one) {
++lt;
a *= lbeta;
c = slamc3_(&a, &one);
r__1 = -(double)a;
c = slamc3_(&c, &r__1);
goto L30;
}
/* + END WHILE */
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*ieee1 = lieee1;
return 0;
/* End of SLAMC1 */
} /* slamc1_ */
/* Subroutine */ int slamc2_(int *beta, int *t, int *rnd, float *
eps, int *emin, float *rmin, int *emax, float *rmax)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1992
Purpose
=======
SLAMC2 determines the machine parameters specified in its argument
list.
Arguments
=========
BETA (output) INT
The base of the machine.
T (output) INT
The number of ( BETA ) digits in the mantissa.
RND (output) INT
Specifies whether proper rounding ( RND = .TRUE. ) or
chopping ( RND = .FALSE. ) occurs in addition. This may not
be a reliable guide to the way in which the machine performs
its arithmetic.
EPS (output) FLOAT
The smallest positive number such that
fl( 1.0 - EPS ) .LT. 1.0,
where fl denotes the computed value.
EMIN (output) INT
The minimum exponent before (gradual) underflow occurs.
RMIN (output) FLOAT
The smallest normalized number for the machine, given by
BASE**( EMIN - 1 ), where BASE is the floating point value
of BETA.
EMAX (output) INT
The maximum exponent before overflow occurs.
RMAX (output) FLOAT
The largest positive number for the machine, given by
BASE**EMAX * ( 1 - EPS ), where BASE is the floating point
value of BETA.
Further Details
===============
The computation of EPS is based on a routine PARANOIA by
W. Kahan of the University of California at Berkeley.
=====================================================================
*/
/* Table of constant values */
static int c__1 = 1;
/* Initialized data */
static int first = TRUE_;
static int iwarn = FALSE_;
/* System generated locals */
int i__1;
float r__1, r__2, r__3, r__4, r__5;
/* Builtin functions */
double pow_ri(float *, int *);
/* Local variables */
static int ieee;
static float half;
static int lrnd;
static float leps, zero, a, b, c;
static int i, lbeta;
static float rbase;
static int lemin, lemax, gnmin;
static float small;
static int gpmin;
static float third, lrmin, lrmax, sixth;
static int lieee1;
extern /* Subroutine */ int slamc1_(int *, int *, int *,
int *);
extern double slamc3_(float *, float *);
extern /* Subroutine */ int slamc4_(int *, float *, int *),
slamc5_(int *, int *, int *, int *, int *,
float *);
static int lt, ngnmin, ngpmin;
static float one, two;
if (first) {
first = FALSE_;
zero = 0.f;
one = 1.f;
two = 2.f;
/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values
of
BETA, T, RND, EPS, EMIN and RMIN.
Throughout this routine we use the function SLAMC3 to ens
ure
that relevant values are stored and not held in registers,
or
are not affected by optimizers.
SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1.
*/
slamc1_(&lbeta, &lt, &lrnd, &lieee1);
/* Start to find EPS. */
b = (float) lbeta;
i__1 = -lt;
a = pow_ri(&b, &i__1);
leps = a;
/* Try some tricks to see whether or not this is the correct E
PS. */
b = two / 3;
half = one / 2;
r__1 = -(double)half;
sixth = slamc3_(&b, &r__1);
third = slamc3_(&sixth, &sixth);
r__1 = -(double)half;
b = slamc3_(&third, &r__1);
b = slamc3_(&b, &sixth);
b = dabs(b);
if (b < leps) {
b = leps;
}
leps = 1.f;
/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
L10:
if (leps > b && b > zero) {
leps = b;
r__1 = half * leps;
/* Computing 5th power */
r__3 = two, r__4 = r__3, r__3 *= r__3;
/* Computing 2nd power */
r__5 = leps;
r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
c = slamc3_(&r__1, &r__2);
r__1 = -(double)c;
c = slamc3_(&half, &r__1);
b = slamc3_(&half, &c);
r__1 = -(double)b;
c = slamc3_(&half, &r__1);
b = slamc3_(&half, &c);
goto L10;
}
/* + END WHILE */
if (a < leps) {
leps = a;
}
/* Computation of EPS complete.
Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3
)).
Keep dividing A by BETA until (gradual) underflow occurs. T
his
is detected when we cannot recover the previous A. */
rbase = one / lbeta;
small = one;
for (i = 1; i <= 3; ++i) {
r__1 = small * rbase;
small = slamc3_(&r__1, &zero);
/* L20: */
}
a = slamc3_(&one, &small);
slamc4_(&ngpmin, &one, &lbeta);
r__1 = -(double)one;
slamc4_(&ngnmin, &r__1, &lbeta);
slamc4_(&gpmin, &a, &lbeta);
r__1 = -(double)a;
slamc4_(&gnmin, &r__1, &lbeta);
ieee = FALSE_;
if (ngpmin == ngnmin && gpmin == gnmin) {
if (ngpmin == gpmin) {
lemin = ngpmin;
/* ( Non twos-complement machines, no gradual under
flow;
e.g., VAX ) */
} else if (gpmin - ngpmin == 3) {
lemin = ngpmin - 1 + lt;
ieee = TRUE_;
/* ( Non twos-complement machines, with gradual und
erflow;
e.g., IEEE standard followers ) */
} else {
lemin = min(ngpmin,gpmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else if (ngpmin == gpmin && ngnmin == gnmin) {
if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
lemin = max(ngpmin,ngnmin);
/* ( Twos-complement machines, no gradual underflow
;
e.g., CYBER 205 ) */
} else {
lemin = min(ngpmin,ngnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
{
if (gpmin - min(ngpmin,ngnmin) == 3) {
lemin = max(ngpmin,ngnmin) - 1 + lt;
/* ( Twos-complement machines with gradual underflo
w;
no known machine ) */
} else {
lemin = min(ngpmin,ngnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else {
/* Computing MIN */
i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
lemin = min(i__1,gnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
/* **
Comment out this if block if EMIN is ok */
if (iwarn) {
first = TRUE_;
printf("\n\n WARNING. The value EMIN may be incorrect:- ");
printf("EMIN = %8i\n",lemin);
printf("If, after inspection, the value EMIN looks acceptable");
printf("please comment out \n the IF block as marked within the");
printf("code of routine SLAMC2, \n otherwise supply EMIN");
printf("explicitly.\n");
}
/* **
Assume IEEE arithmetic if we found denormalised numbers abo
ve,
or if arithmetic seems to round in the IEEE style, determi
ned
in routine SLAMC1. A true IEEE machine should have both thi
ngs
true; however, faulty machines may have one or the other. */
ieee = ieee || lieee1;
/* Compute RMIN by successive division by BETA. We could comp
ute
RMIN as BASE**( EMIN - 1 ), but some machines underflow dur
ing
this computation. */
lrmin = 1.f;
i__1 = 1 - lemin;
for (i = 1; i <= 1-lemin; ++i) {
r__1 = lrmin * rbase;
lrmin = slamc3_(&r__1, &zero);
/* L30: */
}
/* Finally, call SLAMC5 to compute EMAX and RMAX. */
slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*eps = leps;
*emin = lemin;
*rmin = lrmin;
*emax = lemax;
*rmax = lrmax;
return 0;
/* End of SLAMC2 */
} /* slamc2_ */
double slamc3_(float *a, float *b)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1992
Purpose
=======
SLAMC3 is intended to force A and B to be stored prior to doing
the addition of A and B , for use in situations where optimizers
might hold one of these in a register.
Arguments
=========
A, B (input) FLOAT
The values A and B.
=====================================================================
*/
/* >>Start of File<<
System generated locals */
float ret_val;
ret_val = *a + *b;
return ret_val;
/* End of SLAMC3 */
} /* slamc3_ */
/* Subroutine */ int slamc4_(int *emin, float *start, int *base)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1992
Purpose
=======
SLAMC4 is a service routine for SLAMC2.
Arguments
=========
EMIN (output) EMIN
The minimum exponent before (gradual) underflow, computed by
setting A = START and dividing by BASE until the previous A
can not be recovered.
START (input) FLOAT
The starting point for determining EMIN.
BASE (input) INT
The base of the machine.
=====================================================================
*/
/* System generated locals */
int i__1;
float r__1;
/* Local variables */
static float zero, a;
static int i;
static float rbase, b1, b2, c1, c2, d1, d2;
extern double slamc3_(float *, float *);
static float one;
a = *start;
one = 1.f;
rbase = one / *base;
zero = 0.f;
*emin = 1;
r__1 = a * rbase;
b1 = slamc3_(&r__1, &zero);
c1 = a;
c2 = a;
d1 = a;
d2 = a;
/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
$ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */
L10:
if (c1 == a && c2 == a && d1 == a && d2 == a) {
--(*emin);
a = b1;
r__1 = a / *base;
b1 = slamc3_(&r__1, &zero);
r__1 = b1 * *base;
c1 = slamc3_(&r__1, &zero);
d1 = zero;
i__1 = *base;
for (i = 1; i <= *base; ++i) {
d1 += b1;
/* L20: */
}
r__1 = a * rbase;
b2 = slamc3_(&r__1, &zero);
r__1 = b2 / rbase;
c2 = slamc3_(&r__1, &zero);
d2 = zero;
i__1 = *base;
for (i = 1; i <= *base; ++i) {
d2 += b2;
/* L30: */
}
goto L10;
}
/* + END WHILE */
return 0;
/* End of SLAMC4 */
} /* slamc4_ */
/* Subroutine */ int slamc5_(int *beta, int *p, int *emin,
int *ieee, int *emax, float *rmax)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1992
Purpose
=======
SLAMC5 attempts to compute RMAX, the largest machine floating-point
number, without overflow. It assumes that EMAX + abs(EMIN) sum
approximately to a power of 2. It will fail on machines where this
assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
EMAX = 28718). It will also fail if the value supplied for EMIN is
too large (i.e. too close to zero), probably with overflow.
Arguments
=========
BETA (input) INT
The base of floating-point arithmetic.
P (input) INT
The number of base BETA digits in the mantissa of a
floating-point value.
EMIN (input) INT
The minimum exponent before (gradual) underflow.
IEEE (input) INT
A logical flag specifying whether or not the arithmetic
system is thought to comply with the IEEE standard.
EMAX (output) INT
The largest exponent before overflow
RMAX (output) FLOAT
The largest machine floating-point number.
=====================================================================
First compute LEXP and UEXP, two powers of 2 that bound
abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
approximately to the bound that is closest to abs(EMIN).
(EMAX is the exponent of the required number RMAX). */
/* Table of constant values */
static float c_b5 = 0.f;
/* System generated locals */
int i__1;
float r__1;
/* Local variables */
static int lexp;
static float oldy;
static int uexp, i;
static float y, z;
static int nbits;
extern double slamc3_(float *, float *);
static float recbas;
static int exbits, expsum, try__;
lexp = 1;
exbits = 1;
L10:
try__ = lexp << 1;
if (try__ <= -(*emin)) {
lexp = try__;
++exbits;
goto L10;
}
if (lexp == -(*emin)) {
uexp = lexp;
} else {
uexp = try__;
++exbits;
}
/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater
than or equal to EMIN. EXBITS is the number of bits needed to
store the exponent. */
if (uexp + *emin > -lexp - *emin) {
expsum = lexp << 1;
} else {
expsum = uexp << 1;
}
/* EXPSUM is the exponent range, approximately equal to
EMAX - EMIN + 1 . */
*emax = expsum + *emin - 1;
nbits = exbits + 1 + *p;
/* NBITS is the total number of bits needed to store a
floating-point number. */
if (nbits % 2 == 1 && *beta == 2) {
/* Either there are an odd number of bits used to store a
floating-point number, which is unlikely, or some bits are
not used in the representation of numbers, which is possible
,
(e.g. Cray machines) or the mantissa has an implicit bit,
(e.g. IEEE machines, Dec Vax machines), which is perhaps the
most likely. We have to assume the last alternative.
If this is true, then we need to reduce EMAX by one because
there must be some way of representing zero in an implicit-b
it
system. On machines like Cray, we are reducing EMAX by one
unnecessarily. */
--(*emax);
}
if (*ieee) {
/* Assume we are on an IEEE machine which reserves one exponent
for infinity and NaN. */
--(*emax);
}
/* Now create RMAX, the largest machine number, which should
be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
First compute 1.0 - BETA**(-P), being careful that the
result is less than 1.0 . */
recbas = 1.f / *beta;
z = *beta - 1.f;
y = 0.f;
i__1 = *p;
for (i = 1; i <= *p; ++i) {
z *= recbas;
if (y < 1.f) {
oldy = y;
}
y = slamc3_(&y, &z);
/* L20: */
}
if (y >= 1.f) {
y = oldy;
}
/* Now multiply by BETA**EMAX to get RMAX. */
i__1 = *emax;
for (i = 1; i <= *emax; ++i) {
r__1 = y * *beta;
y = slamc3_(&r__1, &c_b5);
/* L30: */
}
*rmax = y;
return 0;
/* End of SLAMC5 */
} /* slamc5_ */
double pow_ri(float *ap, int *bp)
{
double pow, x;
int n;
pow = 1;
x = *ap;
n = *bp;
if(n != 0)
{
if(n < 0)
{
n = -n;
x = 1/x;
}
for( ; ; )
{
if(n & 01)
pow *= x;
if(n >>= 1)
x *= x;
else
break;
}
}
return(pow);
}