/* Some thoughts on integer square root subroutines, including golfed versions.
* The sqrt functions in this file are supposed to return floor(sqrt(n)).
XXX merge sqrt.c into here
*/
#include
#include
/* Return floor(log n / log 2), quickly. */
static inline int lg(unsigned n) {
int lg = 0;
while (n > 0xffff) { lg += 16; n >>= 16; }
if (n > 0xff) { lg += 8; n >>= 8; }
if (n > 0xf) { lg += 4; n >>= 4; }
if (n > 0x3) { lg += 2; n >>= 2; }
if (n > 0x1) { lg += 1; n >>= 1; }
return lg;
}
/* Newton’s method square root. */
static inline int sqt(unsigned n) {
int lgn = lg(n);
unsigned guess = n >> (lgn/2); /* approximate square root, save a few iters */
unsigned newguess = guess;
if (!n) return 0;
do {
guess = newguess;
newguess = (guess + n/guess)/2;
} while (guess - newguess > 1 && guess - newguess < -1);
if (newguess*newguess > n) return newguess-1;
return newguess;
}
/* obfuscated version without lg */
/*
g,h;q(n){h=g=n;while(g/=4)h/=2;if(n)do{g=h;h=(g+n/g)/2;}while(g-h>1||h-g>1);return(h*h>n)?h-1:h;}
*/
/* this version is slower but a lot shorter: */
/* g,h;q(n){h=n;if(n)do g=h,h=(g+n/g)/2;while(g-h+2&~3);return h-=h*h>n;} */
/* if you just init h to 1 globally and then let h stick around from
one call to the next, it gets a *lot* faster in this test, but
that’s kind of cheating... */
/* expanded out:
int sqrt(int n) {
int g=n, h=n;
if (n != 0) {
do {
g = h;
h = (g + n/g)/2;;
} while (abs(g-h) > 1);
if (h*h > n) {
return h-1;
} else {
return h;
}
}
}
A trivial Forth translation:
variable g variable h variable n
: q dup n ! dup g ! dup h !
if begin h @ g ! g @ n @ g @ / + 2/ h ! g @ h @ - 2 + 3 invert and while repeat then
h @ dup dup * n @ > if 1- then ;
A version without "g" or "h":
variable n : improve n @ over / + 2/ ; : far? - abs 1 > ;
: sqrt dup n ! dup if begin dup improve swap over far? while repeat then
dup dup * n @ > if 1- then ;
*/
/* you could imagine a golfscript that shortened that to, say
vgvh:q_nn!hn[{h!ggn+g/2/!hgh-2+3~&}]hhh*n>-;
with the following definitions:
v - declare global variable
:q - declare function q
_n - declare parameter p
n - fetch from variable n
!n - store to variable n
[] - conditional: if (from StoneKnifeForth)
{} - do-while loop (from StoneKnifeForth)
+ - add top two stack values
- - subtract top two stack values
* - multiply top two stack values
/ - divide top two stack values
2 - push literal 2 (and similarly for other digit sequences)
~ - bitwise NOT
& - bitwise AND
> - compare top two stack values
along with, say, suitable K-style array math.
In K you'd replace the :q_n ... ; with just {} and use “x” instead of “n”.
*/
/*
Here’s an abbreviated version of the version from the jasonwoof Wiki
at :
: -> 2dup / over - 2/ ; : sqrt 1 begin -> dup while + repeat drop nip ;
This uses a slightly different approach. The stack effect of -> is
( square guess -- square guess delta ), the delta is calculated as
(square / guess - guess)/2, and so the new guess would be
guess + (square / guess - guess)/2 =
guess + (square / guess) / 2 - guess / 2 =
guess / 2 + (square / guess) / 2 =
(guess + square / guess) / 2
so it’s equivalent if we disregard rounding and zero. It does,
however, give different answers due to rounding, like sqrt(3) -> 2,
and more damningly sqrt(6) -> 3, when the answer to two places is
2.45.
Here’s a version that uses many fewer divisions, like typically up to
about 5 instead of up to about 20:
: rough dup begin 2/ swap 2/ 2/ dup while swap repeat drop ;
: -> 2dup / over - 2/ ; : sqrt dup rough begin -> dup while + repeat drop nip ;
This can be inlined to:
: sqrt dup dup begin 2/ swap 2/ 2/ dup while swap repeat drop
begin dup . 2dup / over - 2/ dup while + repeat drop nip ;
This all apparently depends on (-1)/2 being 0 and not -1.
A C one-liner version of the Forth one-liner would be
g,d;q(n){for(g=1;d=(n/g-g)/2;g+=d);return g;}
It sometimes returns a number that’s too high by 1, which can be fixed
the same way as the other C version:
g,d;q(n){for(g=1;d=(n/g-g)/2;g+=d);return g-=g*g>n;}
And we can handle 0 with that same case, which makes it longer than the
really simplified version but still shorter than the other approach:
g,d;q(n){g=1;if(n)for(;d=(n/g-g)/2;g+=d);return g-=g*g>n;}
Now, one problem with the above is that it only works on signed
integers. (n/g-g) can be either positive or negative. It starts out
positive (except when the square root really is 1, the initial guess,
or 0) and thereafter it’s negative. We can solve the problem by
making it always positive, by starting the guess at n instead of 1,
and inverting its sign, without adding any extra characters --- except
that an “unsigned” declaration is necessary for this to matter, sigh. If
it weren’t for some tricky corners of the C type coercion rules, you’d
need more than one.
I don’t have a concise proof that g-n/g will never go negative here,
although I haven’t found a case where it does yet. I think you could
probably write such a proof.
This function runs into trouble at 4294901760 (i.e. 2³² - 2¹⁶). It
exits the loop with g == 65536 == 2¹⁶, which is too high; the desired
answer is 65535. The g*g>n test that works to correct this problem
for smaller numbers fails here because 65536² == 0 in 32-bit math. I
think solving that problem for unsigned requires more code, so for the
sake of golf, I’m not going to do it.
For most golf purposes, you probably really only need the innermost
for loop and the initialization g=n. After ensuring that n isn’t 0
and `for(g=n;d=(g-n/g)/2;g-=d);`, you know that g is one of the two
numbers bracketing the square root of n.
*/
unsigned
g,d;q(n){g=n;if(n)for(;d=(g-n/g)/2;g-=d);return g-=g*g>n;}
/* With a decent 1976-era C layout using tabs, that’s 100 bytes
instead of 58, excluding the “unsigned”:
unsigned g, d;
q(n)
{
g = n;
if (n)
for(; d = (g - n/g) / 2; g -= d)
;
return g -= g*g>n;
}
*/
/* Now, Newton’s method is pretty fast if you start with a reasonably
good guess, as sqt does, which is why it usually finishes in three
to five iterations. But the function q above can take a lot of
iterations. There are a couple of directions to go with this:
- You could start with a better guess, perhaps one taken or
interpolated from a lookup table, or generated with a polynomial.
- You could use a dumber approach than Newton’s Method to make the
iterations faster. Binary search, say.
Here’s a binary-search approach.
*/
int r(unsigned n) {
/* invariant: min^2 <= n. max^2 >= n. */
unsigned min = 0, max = 0xffff, mid;
while (min + 1 < max) {
mid = min + (max - min)/2;
if (mid*mid > n) max = mid;
else min = mid;
}
if (max*max == n) return max;
return min;
}
/* There are a couple of problems with that code.
Its initial max is 0xffff because that’s the biggest number we can
square in 32-bit C without an overflow. In assembly, this wouldn’t be
a problem, since 32-bit multiplies produce 64-bit results.
This actually results in wrong answers taking square roots of numbers
over 2^32 - 2^17.
It’s also a bit sloppy. When n is a perfect square, the invariant
allows us to exit the loop with min == n and max == n+1, or min == n-1
and max == n. This requires an extra test at the end to clean up.
*/
/* === Main program: exhaustive testing routines for the above === */
/* Test a square-root routine to make sure it works. */
void testsqrt(int (*sqrt)(unsigned), char *name, unsigned upto) {
unsigned ii;
long long result;
printf("Checking %s\n", name);
/* We do the squaring here in 64-bit “long long” to
* avoid overflow. */
ii = 0;
do {
result = sqrt(ii);
assert((result*result <= ii) || (fprintf(stderr, "%u\n", ii) && 0));
assert(((result+1)*(result+1) > ii) || (fprintf(stderr, "%u\n", ii) && 0));
if (ii % 16777216 == 16777215) printf("checked %s up to %u\n", name, ii);
ii++;
} while (ii != upto);
}
int main() {
/* first, basic sanity test for lg */
assert(lg(511) == 8);
assert(lg(512) == 9);
assert(lg(16777215) == 23);
assert(lg(16777216) == 24);
/* A really good square root routine would work for all
* representable values, of course. `sqt` does, but `q` and `r`
* fail for some numbers over 65535**2. */
testsqrt(r, "binary search r", 4294836225u);
testsqrt(sqt, "faster function sqt", 0);
testsqrt((int(*)(unsigned))q, "golfed function q", 4294901759u);
return 0;
}