qsqrt.h Source File

00001 /*
00002     Copyright (C) 2000 by Andrew Zabolotny (Intel version)
00003     Copyright (C) 2002 by Matthew Reda <reda@mac.com> (PowerPC version)
00004     Fast computation of sqrt(x) and 1/sqrt(x)
00005   
00006     This library is free software; you can redistribute it and/or
00007     modify it under the terms of the GNU Library General Public
00008     License as published by the Free Software Foundation; either
00009     version 2 of the License, or (at your option) any later version.
00010   
00011     This library is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     Library General Public License for more details.
00015   
00016     You should have received a copy of the GNU Library General Public
00017     License along with this library; if not, write to the Free
00018     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019 */
00020 
00021 // Uncomment the following line to define CS_NO_QSQRT if you experience
00022 // mysterious problems with CS which you think are related to this
00023 // version of sqrt not behaving properly. If you find something like
00024 // that I'd like to be notified of this so we can make sure this really
00025 // is the problem.
00026 //#define CS_NO_QSQRT
00027 
00028 #ifndef __QSQRT_H__
00029 #define __QSQRT_H__
00030 
00031 #if (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_GCC)
00032 
00033 /*
00034   NB: Single-precision floating-point format (32 bits):
00035         SEEEEEEE.EMMMMMMM.MMMMMMMM.MMMMMMMM
00036         S: Sign (0 - positive, 1 - negative)
00037         E: Exponent (plus 127, 8 bits)
00038         M: Mantissa (23 bits)
00039 */
00040 
00049 static inline float qsqrt (float x)
00050 {
00051   float ret;
00052 
00053 // Original C++ formulae:
00054 // float tmp = x;
00055 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1;
00056 // double h = x * 0.5;
00057 // double a = tmp;
00058 // a *= 1.5 - a * a * h;
00059 // a *= 1.5 - a * a * h;
00060 // return a * x;
00061 
00062   __asm__ (
00063                 "flds   %1\n"                   // x
00064                 "movl   $0xbe6f0000,%%eax\n"
00065                 "subl   %1,%%eax\n"
00066                 "shrl   $1,%%eax\n"
00067                 "movl   %%eax,%1\n"
00068                 "flds   %2\n"                   // x 0.5
00069                 "fmul   %%st(1)\n"              // x h
00070                 "flds   %3\n"                   // x h 1.5
00071                 "flds   %1\n"                   // x h 1.5 a
00072                 "fld    %%st\n"                 // x h 1.5 a a
00073                 "fmul   %%st\n"                 // x h 1.5 a a*a
00074                 "fmul   %%st(3)\n"              // x h 1.5 a a*a*h
00075                 "fsubr  %%st(2)\n"              // x h 1.5 a 1.5-a*a*h
00076                 "fmulp  %%st(1)\n"              // x h 1.5 a
00077                 "fld    %%st\n"                 // x h 1.5 a a
00078                 "fmul   %%st\n"                 // x h 1.5 a a*a
00079                 "fmulp  %%st(3)\n"              // x a*a*h 1.5 a
00080                 "fxch\n"                        // x a*a*h a 1.5
00081                 "fsubp  %%st,%%st(2)\n"         // x 1.5-a*a*h a
00082                 "fmulp  %%st(1)\n"              // x a
00083                 "fmulp  %%st(1)\n"              // a
00084         : "=&t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00085         : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00086   );
00087   return ret;
00088 }
00089 
00097 static inline float qisqrt (float x)
00098 {
00099   float ret;
00100   __asm__ (
00101                 "flds   %1\n"                   // x
00102                 "movl   $0xbe6f0000,%%eax\n"
00103                 "subl   %1,%%eax\n"
00104                 "shrl   $1,%%eax\n"
00105                 "movl   %%eax,%1\n"
00106                 "flds   %2\n"                   // x 0.5
00107                 "fmulp  %%st(1)\n"              // h
00108                 "flds   %3\n"                   // h 1.5
00109                 "flds   %1\n"                   // h 1.5 a
00110                 "fld    %%st\n"                 // h 1.5 a a
00111                 "fmul   %%st\n"                 // h 1.5 a a*a
00112                 "fmul   %%st(3)\n"              // h 1.5 a a*a*h
00113                 "fsubr  %%st(2)\n"              // h 1.5 a 1.5-a*a*h
00114                 "fmulp  %%st(1)\n"              // h 1.5 a
00115                 "fld    %%st\n"                 // h 1.5 a a
00116                 "fmul   %%st\n"                 // h 1.5 a a*a
00117                 "fmulp  %%st(3)\n"              // a*a*h 1.5 a
00118                 "fxch\n"                        // a*a*h a 1.5
00119                 "fsubp  %%st,%%st(2)\n"         // 1.5-a*a*h a
00120                 "fmulp  %%st(1)\n"              // a
00121         : "=t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00122         : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00123   );
00124   return ret;
00125 }
00126 
00127 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_POWERPC) && defined (COMP_GCC)
00128 
00136 static inline float qsqrt(float x)
00137 {
00138   float y0 = 0.0;
00139 
00140   if (x != 0.0)
00141   {
00142     float x0 = x * 0.5f;
00143 
00144     asm ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00145     
00146     y0 = y0 * (1.5f - x0 * y0 * y0);
00147     y0 = (y0 * (1.5f - x0 * y0 * y0)) * x;
00148   };
00149     
00150   return y0;
00151 };
00152 
00157 static inline float qisqrt(float x)
00158 {
00159   float x0 = x * 0.5f;
00160   float y0;
00161   asm ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00162     
00163   y0 = y0 * (1.5f - x0 * y0 * y0);
00164   y0 = y0 * (1.5f - x0 * y0 * y0);
00165 
00166   return y0;
00167 };
00168 
00169 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_VC)
00170 
00171 #include <math.h>
00172 
00173 #define qsqrt(x) sqrtf(x)
00174 #define qisqrt(x) (1.0f / sqrtf(x))
00175 
00176 #else
00177 
00178 #include <math.h>
00179 #define qsqrt(x)  sqrt(x)
00180 #define qisqrt(x) (1.0/sqrt(x))
00181 
00182 #endif
00183 
00184 #endif // __QSQRT_H__