someone, somewhere in a newsgroup mentioned this method won a squareroot-routine contest,
IT WORKS and is really FAST.

 

#define STEP(k) s=r+(1L<<k*2);r>>=1;if(s<=v){v-=s;r|=(1L<<k*2);}

unsigned long    sqrt_u_32(unsigned long n)
{
    unsigned long    r=0,s,v=n;
   
    STEP(15);STEP(14);STEP(13);STEP(12);
    STEP(11);STEP(10);STEP(9);STEP(8);
    STEP(7);STEP(6);STEP(5);STEP(4);
    STEP(3);STEP(2);STEP(1);STEP(0);
    return r;
}
unsigned short        sqrt_u_16(unsigned short n)
{
    register unsigned short     r=0,s,v=n;
   
    STEP(7);STEP(6);STEP(5);STEP(4);
    STEP(3);STEP(2);STEP(1);STEP(0);
    return r;
}

Andreas Schrattenecker