Blame - fusl/src/math/sqrt.c - mojo

blob: ff184fa7f08787e47cf524d5dbf066c6103d321e [file] [log] [blame]

Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	1	/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
				2	/*
				3	* ====================================================
				4	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
				5	*
				6	* Developed at SunSoft, a Sun Microsystems, Inc. business.
				7	* Permission to use, copy, modify, and distribute this
				8	* software is freely granted, provided that this notice
				9	* is preserved.
				10	* ====================================================
				11	*/
				12	/* sqrt(x)
				13	* Return correctly rounded sqrt.
				14	* ------------------------------------------
				15	* \| Use the hardware sqrt if you have one \|
				16	* ------------------------------------------
				17	* Method:
				18	* Bit by bit method using integer arithmetic. (Slow, but portable)
				19	* 1. Normalization
				20	* Scale x to y in [1,4) with even powers of 2:
				21	* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
				22	* sqrt(x) = 2^k * sqrt(y)
				23	* 2. Bit by bit computation
				24	* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
				25	* i 0
				26	* i+1 2
				27	* s = 2q , and y = 2 ( y - q ). (1)
				28	* i i i i
				29	*
				30	* To compute q from q , one checks whether
				31	* i+1 i
				32	*
				33	* -(i+1) 2
				34	* (q + 2 ) <= y. (2)
				35	* i
				36	* -(i+1)
				37	* If (2) is false, then q = q ; otherwise q = q + 2 .
				38	* i+1 i i+1 i
				39	*
				40	* With some algebric manipulation, it is not difficult to see
				41	* that (2) is equivalent to
				42	* -(i+1)
				43	* s + 2 <= y (3)
				44	* i i
				45	*
				46	* The advantage of (3) is that s and y can be computed by
				47	* i i
				48	* the following recurrence formula:
				49	* if (3) is false
				50	*
				51	* s = s , y = y ; (4)
				52	* i+1 i i+1 i
				53	*
				54	* otherwise,
				55	* -i -(i+1)
				56	* s = s + 2 , y = y - s - 2 (5)
				57	* i+1 i i+1 i i
				58	*
				59	* One may easily use induction to prove (4) and (5).
				60	* Note. Since the left hand side of (3) contain only i+2 bits,
				61	* it does not necessary to do a full (53-bit) comparison
				62	* in (3).
				63	* 3. Final rounding
				64	* After generating the 53 bits result, we compute one more bit.
				65	* Together with the remainder, we can decide whether the
				66	* result is exact, bigger than 1/2ulp, or less than 1/2ulp
				67	* (it will never equal to 1/2ulp).
				68	* The rounding mode can be detected by checking whether
				69	* huge + tiny is equal to huge, and whether huge - tiny is
				70	* equal to huge for some floating point number "huge" and "tiny".
				71	*
				72	* Special cases:
				73	* sqrt(+-0) = +-0 ... exact
				74	* sqrt(inf) = inf
				75	* sqrt(-ve) = NaN ... with invalid signal
				76	* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
				77	*/
				78
				79	#include "libm.h"
				80
				81	static const double tiny = 1.0e-300;
				82
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	83	double sqrt(double x) {
				84	double z;
				85	int32_t sign = (int)0x80000000;
				86	int32_t ix0, s0, q, m, t, i;
				87	uint32_t r, t1, s1, ix1, q1;
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	88
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	89	EXTRACT_WORDS(ix0, ix1, x);
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	90
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	91	/* take care of Inf and NaN */
				92	if ((ix0 & 0x7ff00000) == 0x7ff00000) {
				93	return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
				94	}
				95	/* take care of zero */
				96	if (ix0 <= 0) {
				97	if (((ix0 & ~sign) \| ix1) == 0)
				98	return x; /* sqrt(+-0) = +-0 */
				99	if (ix0 < 0)
				100	return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
				101	}
				102	/* normalize x */
				103	m = ix0 >> 20;
				104	if (m == 0) { /* subnormal x */
				105	while (ix0 == 0) {
				106	m -= 21;
				107	ix0 \|= (ix1 >> 11);
				108	ix1 <<= 21;
				109	}
				110	for (i = 0; (ix0 & 0x00100000) == 0; i++)
				111	ix0 <<= 1;
				112	m -= i - 1;
				113	ix0 \|= ix1 >> (32 - i);
				114	ix1 <<= i;
				115	}
				116	m -= 1023; /* unbias exponent */
				117	ix0 = (ix0 & 0x000fffff) \| 0x00100000;
				118	if (m & 1) { /* odd m, double x to make it even */
				119	ix0 += ix0 + ((ix1 & sign) >> 31);
				120	ix1 += ix1;
				121	}
				122	m >>= 1; /* m = [m/2] */
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	123
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	124	/* generate sqrt(x) bit by bit */
				125	ix0 += ix0 + ((ix1 & sign) >> 31);
				126	ix1 += ix1;
				127	q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
				128	r = 0x00200000; /* r = moving bit from right to left */
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	129
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	130	while (r != 0) {
				131	t = s0 + r;
				132	if (t <= ix0) {
				133	s0 = t + r;
				134	ix0 -= t;
				135	q += r;
				136	}
				137	ix0 += ix0 + ((ix1 & sign) >> 31);
				138	ix1 += ix1;
				139	r >>= 1;
				140	}
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	141
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	142	r = sign;
				143	while (r != 0) {
				144	t1 = s1 + r;
				145	t = s0;
				146	if (t < ix0 \|\| (t == ix0 && t1 <= ix1)) {
				147	s1 = t1 + r;
				148	if ((t1 & sign) == sign && (s1 & sign) == 0)
				149	s0++;
				150	ix0 -= t;
				151	if (ix1 < t1)
				152	ix0--;
				153	ix1 -= t1;
				154	q1 += r;
				155	}
				156	ix0 += ix0 + ((ix1 & sign) >> 31);
				157	ix1 += ix1;
				158	r >>= 1;
				159	}
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	160
George Kulakowski	17e3b04	2016-02-18 15:59:50 -0800	[diff] [blame]	161	/* use floating add to find out rounding direction */
				162	if ((ix0 \| ix1) != 0) {
				163	z = 1.0 - tiny; /* raise inexact flag */
				164	if (z >= 1.0) {
				165	z = 1.0 + tiny;
				166	if (q1 == (uint32_t)0xffffffff) {
				167	q1 = 0;
				168	q++;
				169	} else if (z > 1.0) {
				170	if (q1 == (uint32_t)0xfffffffe)
				171	q++;
				172	q1 += 2;
				173	} else
				174	q1 += q1 & 1;
				175	}
				176	}
				177	ix0 = (q >> 1) + 0x3fe00000;
				178	ix1 = q1 >> 1;
				179	if (q & 1)
				180	ix1 \|= sign;
				181	ix0 += m << 20;
				182	INSERT_WORDS(z, ix0, ix1);
				183	return z;
Viet-Trung Luu	96b05c1	2016-01-11 11:26:36 -0800	[diff] [blame]	184	}