|
qsqrt.h00001 /* 00002 Copyright (C) 2000 by Andrew Zabolotny (Intel version) 00003 Copyright (C) 2002 by Matthew Reda <reda@mac.com> (PowerPC version) 00004 Fast computation of sqrt(x) and 1/sqrt(x) 00005 00006 This library is free software; you can redistribute it and/or 00007 modify it under the terms of the GNU Library General Public 00008 License as published by the Free Software Foundation; either 00009 version 2 of the License, or (at your option) any later version. 00010 00011 This library is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 Library General Public License for more details. 00015 00016 You should have received a copy of the GNU Library General Public 00017 License along with this library; if not, write to the Free 00018 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00019 */ 00020 00021 // Uncomment the following line to define CS_NO_QSQRT if you experience 00022 // mysterious problems with CS which you think are related to this 00023 // version of sqrt not behaving properly. If you find something like 00024 // that I'd like to be notified of this so we can make sure this really 00025 // is the problem. 00026 //#define CS_NO_QSQRT 00027 00028 #ifndef __QSQRT_H__ 00029 #define __QSQRT_H__ 00030 00031 #if (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_GCC) 00032 00033 /* 00034 NB: Single-precision floating-point format (32 bits): 00035 SEEEEEEE.EMMMMMMM.MMMMMMMM.MMMMMMMM 00036 S: Sign (0 - positive, 1 - negative) 00037 E: Exponent (plus 127, 8 bits) 00038 M: Mantissa (23 bits) 00039 */ 00040 00049 static inline float qsqrt (float x) 00050 { 00051 float ret; 00052 00053 // Original C++ formulae: 00054 // float tmp = x; 00055 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1; 00056 // double h = x * 0.5; 00057 // double a = tmp; 00058 // a *= 1.5 - a * a * h; 00059 // a *= 1.5 - a * a * h; 00060 // return a * x; 00061 00062 __asm__ ( 00063 "flds %1\n" // x 00064 "movl $0xbe6f0000,%%eax\n" 00065 "subl %1,%%eax\n" 00066 "shrl $1,%%eax\n" 00067 "movl %%eax,%1\n" 00068 "flds %2\n" // x 0.5 00069 "fmul %%st(1)\n" // x h 00070 "flds %3\n" // x h 1.5 00071 "flds %1\n" // x h 1.5 a 00072 "fld %%st\n" // x h 1.5 a a 00073 "fmul %%st\n" // x h 1.5 a a*a 00074 "fmul %%st(3)\n" // x h 1.5 a a*a*h 00075 "fsubr %%st(2)\n" // x h 1.5 a 1.5-a*a*h 00076 "fmulp %%st(1)\n" // x h 1.5 a 00077 "fld %%st\n" // x h 1.5 a a 00078 "fmul %%st\n" // x h 1.5 a a*a 00079 "fmulp %%st(3)\n" // x a*a*h 1.5 a 00080 "fxch\n" // x a*a*h a 1.5 00081 "fsubp %%st,%%st(2)\n" // x 1.5-a*a*h a 00082 "fmulp %%st(1)\n" // x a 00083 "fmulp %%st(1)\n" // a 00084 : "=&t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F) 00085 : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" 00086 ); 00087 return ret; 00088 } 00089 00097 static inline float qisqrt (float x) 00098 { 00099 float ret; 00100 __asm__ ( 00101 "flds %1\n" // x 00102 "movl $0xbe6f0000,%%eax\n" 00103 "subl %1,%%eax\n" 00104 "shrl $1,%%eax\n" 00105 "movl %%eax,%1\n" 00106 "flds %2\n" // x 0.5 00107 "fmulp %%st(1)\n" // h 00108 "flds %3\n" // h 1.5 00109 "flds %1\n" // h 1.5 a 00110 "fld %%st\n" // h 1.5 a a 00111 "fmul %%st\n" // h 1.5 a a*a 00112 "fmul %%st(3)\n" // h 1.5 a a*a*h 00113 "fsubr %%st(2)\n" // h 1.5 a 1.5-a*a*h 00114 "fmulp %%st(1)\n" // h 1.5 a 00115 "fld %%st\n" // h 1.5 a a 00116 "fmul %%st\n" // h 1.5 a a*a 00117 "fmulp %%st(3)\n" // a*a*h 1.5 a 00118 "fxch\n" // a*a*h a 1.5 00119 "fsubp %%st,%%st(2)\n" // 1.5-a*a*h a 00120 "fmulp %%st(1)\n" // a 00121 : "=t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F) 00122 : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" 00123 ); 00124 return ret; 00125 } 00126 00127 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_POWERPC) && defined (COMP_GCC) 00128 00136 static inline float qsqrt(float x) 00137 { 00138 float y0 = 0.0; 00139 00140 if (x != 0.0) 00141 { 00142 float x0 = x * 0.5f; 00143 00144 asm ("frsqrte %0,%1" : "=f" (y0) : "f" (x)); 00145 00146 y0 = y0 * (1.5f - x0 * y0 * y0); 00147 y0 = (y0 * (1.5f - x0 * y0 * y0)) * x; 00148 }; 00149 00150 return y0; 00151 }; 00152 00157 static inline float qisqrt(float x) 00158 { 00159 float x0 = x * 0.5f; 00160 float y0; 00161 asm ("frsqrte %0,%1" : "=f" (y0) : "f" (x)); 00162 00163 y0 = y0 * (1.5f - x0 * y0 * y0); 00164 y0 = y0 * (1.5f - x0 * y0 * y0); 00165 00166 return y0; 00167 }; 00168 00169 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_VC) 00170 00171 #include <math.h> 00172 00173 #define qsqrt(x) sqrtf(x) 00174 #define qisqrt(x) (1.0f / sqrtf(x)) 00175 00176 #else 00177 00178 #include <math.h> 00179 #define qsqrt(x) sqrt(x) 00180 #define qisqrt(x) (1.0/sqrt(x)) 00181 00182 #endif 00183 00184 #endif // __QSQRT_H__ Generated for Crystal Space by doxygen 1.2.5 written by Dimitri van Heesch, ©1997-2000 |