/************************************************************************ * * * N O T I C E * * * * Copyright Abandoned, 1987, Fred Fish * * * * This previously copyrighted work has been placed into the * * public domain by the author (Fred Fish) and may be freely used * * for any purpose, private or commercial. I would appreciate * * it, as a courtesy, if this notice is left in all copies and * * derivative works. Thank you, and enjoy... * * * * The author makes no warranty of any kind with respect to this * * product and explicitly disclaims any implied warranties of * * merchantability or fitness for any particular purpose. * * * ************************************************************************ */ /* * FUNCTION * * sqrt double precision square root * * KEY WORDS * * sqrt * machine independent routines * math libraries * * DESCRIPTION * * Returns double precision square root of double precision * floating point argument. * * USAGE * * double sqrt (x) * double x; * * REFERENCES * * Fortran IV-PLUS user's guide, Digital Equipment Corp. pp B-7. * * Computer Approximations, J.F. Hart et al, John Wiley & Sons, * 1968, pp. 89-96. * * RESTRICTIONS * * The relative error is 10**(-30.1) after three applications * of Heron's iteration for the square root. * * However, this assumes exact arithmetic in the iterations * and initial approximation. Additional errors may occur * due to truncation, rounding, or machine precision limits. * * PROGRAMMER * * Fred Fish * * INTERNALS * * Computes square root by: * * (1) Range reduction of argument to [0.5,1.0] * by application of identity: * * sqrt(x) = 2**(k/2) * sqrt(x * 2**(-k)) * * k is the exponent when x is written as * a mantissa times a power of 2 (m * 2**k). * It is assumed that the mantissa is * already normalized (0.5 =< m < 1.0). * * (2) An approximation to sqrt(m) is obtained * from: * * u = sqrt(m) = (P0 + P1*m) / (Q0 + Q1*m) * * P0 = 0.594604482 * P1 = 2.54164041 * Q0 = 2.13725758 * Q1 = 1.0 * * (coefficients from HART table #350 pg 193) * * (3) Three applications of Heron's iteration are * performed using: * * y[n+1] = 0.5 * (y[n] + (m/y[n])) * * where y[0] = u = approx sqrt(m) * * (4) If the value of k was odd then y is either * multiplied by the square root of two or * divided by the square root of two for k positive * or negative respectively. This rescales y * by multiplying by 2**frac(k/2). * * (5) Finally, y is rescaled by int(k/2) which * is equivalent to multiplication by 2**int(k/2). * * The result of steps 4 and 5 is that the value * of y between 0.5 and 1.0 has been rescaled by * 2**(k/2) which removes the original rescaling * done prior to finding the mantissa square root. * * NOTES * * The Convergent Technologies compiler optimizes division * by powers of two to "arithmetic shift right" instructions. * This is ok for positive integers but gives different * results than other C compilers for negative integers. * For example, (-1)/2 is -1 on the CT box, 0 on every other * machine I tried. * */ /* * MODIFICATIONS * * Function sqrt(x) is initially approximated on an * interval [0.5..2.0] by a quadratic polynomial with * the following coefficients * * P0 0.3608580479718948e+00 * P1 0.7477707028388739e+00 * P2 -0.1105464728191369e+00 * * These coefficients were computed with a help of Maple * symbolic algebra system. Longer approximation interval * is used in order to avoid multiplication by SQRT2 * constant in case of odd exponents (this idea comes from * Olaf Flebbe, flebbe@tat.physik.uni-tuebingen.de). With * 64 bit IEEE representation three Heron's iterations are * good enough to satisfy essential requirements of Paranoia * test suite. An absolute error after three iterations is * below 2e-19 over the whole range (below 5e-10 after two). * Multiplications by powers of 2 are performed by explicit * calls to ldexp. * * Michal Jaegermann, 21 October 1992 */ #if !defined (__M68881__) && !defined (sfp004) #include #include #include "pml.h" #define P0 0.3608580479718948e+00 #define P1 0.7477707028388739e+00 #define P2 -0.1105464728191369e+00 static char funcname[] = "sqrt"; double sqrt (x) double x; { int k; double m; double u; struct exception xcpt; register double (*dfunc)(double, int) = ldexp; if (x == 0.0) { xcpt.retval = 0.0; } else if (x < 0.0) { xcpt.type = DOMAIN; xcpt.name = funcname; xcpt.arg1 = x; if (!matherr (&xcpt)) { fprintf (stderr, "%s: DOMAIN error\n", funcname); errno = EDOM; xcpt.retval = 0.0; } } else { m = frexp (x, &k); if (k & 1) { m = dfunc(m, 1); /* * multiply by 2 if our exponent was odd; * odd k is now too big by 1 but (k >> 1) will take * care of that */ } u = (P2 * m + P1) * m + P0; /* the first guess */ u = dfunc((u + (m / u)), -1); /* Heron's iteration */ #ifndef __SOZOBON__ /* * with current floating point representation used * by Sozobon C we are already below achievable precision */ u = dfunc((u + (m / u)), -1); /* one more */ #endif /* __SOZOBON__ */ /* * here we rely on the fact that -3/2 and (-3 >> 1) * do give different results; * the last iteration and adjust final exponent properly */ xcpt.retval = dfunc (u + (m / u), (k >> 1) - 1); } return (xcpt.retval); } double hypot(double x, double y) { return sqrt(x*x + y*y); } #else /* __M68881__, sfp004 */ #ifdef sfp004 __asm(" comm = -6 resp = -16 zahl = 0 "); /* end asm */ #endif sfp004 #if defined (__M68881__) || defined (sfp004) __asm(".text; .even"); # ifdef ERROR_CHECK __asm(" _Overflow: .ascii \"OVERFLOW\\0\" _Domain: .ascii \"DOMAIN\\0\" _Error_String: .ascii \"sqrt: %s error\\n\\0\" .even | m.ritzert 7.12.1991 | ritzert@dfg.dbp.de | | /* NAN = {7fffffff,ffffffff} */ | /* +Inf = {7ff00000,00000000} */ | /* -Inf = {fff00000,00000000} */ | /* MAX_D= {7fee42d1,30773b76} */ | /* MIN_D= {ffee42d1,30773b76} */ .even double_max: .long 0x7fee42d1 .long 0x30273b76 double_min: .long 0xffee42d1 .long 0x30273b76 NaN: .long 0x7fffffff .long 0xffffffff p_Inf: .long 0x7ff00000 .long 0x00000000 m_Inf: .long 0xfff00000 .long 0x00000000 "); /* end asm */ # endif ERROR_CHECK __asm(".even .globl _hypot .globl _sqrt _sqrt: "); /* end asm */ #endif /* __M68881__ || sfp004 */ #ifdef __M68881__ __asm(" fsqrtd a7@(4), fp0 | sqrt fmoved fp0,a7@- | push result moveml a7@+,d0-d1 | return_value "); /* end asm */ #endif __M68881__ #ifdef sfp004 __asm(" lea 0xfffa50,a0 movew #0x5404,a0@(comm) | specify function cmpiw #0x8900,a0@(resp) | check movel a7@(4),a0@ | load arg_hi movel a7@(8),a0@ | load arg_low movew #0x7400,a0@(comm) | result to d0 .long 0x0c688900, 0xfff067f8 | wait movel a0@,d0 movel a0@,d1 "); /* end asm */ #endif sfp004 #if defined (__M68881__) || defined (sfp004) # ifdef ERROR_CHECK __asm(" err: lea double_max,a0 | swap d0 | exponent into lower word cmpw a0@(16),d0 | == NaN ? beq error_nan | cmpw a0@(24),d0 | == + Infinity ? beq error_plus | swap d0 | result ok, rts | restore d0 "); #ifndef __MSHORT__ __asm(" error_plus: swap d0 moveml d0-d1,a7@- movel #63,_errno | NAN => errno = EDOM pea _Overflow | for printf bra error_exit | error_nan: moveml a0@(24),d0-d1 | result = +inf moveml d0-d1,a7@- movel #62,_errno | NAN => errno = EDOM pea _Domain | for printf "); #else __MSHORT__ __asm(" error_plus: swap d0 moveml d0-d1,a7@- movew #63,_errno | NAN => errno = EDOM pea _Overflow | for printf bra error_exit | error_nan: moveml a0@(24),d0-d1 | result = +inf moveml d0-d1,a7@- movew #62,_errno | NAN => errno = EDOM pea _Domain | for printf "); #endif __MSHORT__ __asm(" error_exit: pea _Error_String | pea __iob+52 | jbsr _fprintf | addl #12,a7 | moveml a7@+,d0-d1 "); # endif ERROR_CHECK __asm(" rts .even _hypot: "); #endif /* __M68881__ || sfp004 */ #ifdef __M68881__ __asm(" fmoved a7@(4),fp0 | fmulx fp0,fp0 | x**2 fmoved a7@(12),fp1 | fmulx fp1,fp1 | y**2 faddx fp1,fp0 | fsqrtx fp0,fp0 | sqrt( x**2 + y**2 ) fmoved fp0,a7@- | moveml a7@+,d0-d1 | return arg "); #endif __M68881__ #ifdef sfp004 __asm(" lea 0xfffa50,a0 movew #0x5400,a0@(comm) | load fp0 .long 0x0c688900, 0xfff067f8 movel a7@(4),a0@ | load arg_hi movel a7@(8),a0@ | load arg_low movew #0x5480,a0@(comm) | load fp1 .long 0x0c688900, 0xfff067f8 movel a7@(12),a0@ | load arg_hi movel a7@(16),a0@ | load arg_low movew #0x0023,a0@(comm) .word 0x4a68,0xfff0,0x6bfa | test movew #0x04a3,a0@(comm) .word 0x4a68,0xfff0,0x6bfa | test movew #0x0422,a0@(comm) | fp0 = fp0 + fp1 .word 0x4a68,0xfff0,0x6bfa | test movew #0x0004,a0@(comm) | sqrt(fp0) .word 0x4a68,0xfff0,0x6bfa | test movew #0x7400,a0@(comm) | result to d0/d1 .long 0x0c688900, 0xfff067f8 movel a0@(zahl),d0 movel a0@(zahl),d1 "); #endif sfp004 #ifdef ERROR_CHECK __asm("bra err"); #else ERROR_CHECK __asm("rts"); #endif ERROR_CHECK #endif /* __M68881__, sfp004 */