diff options
Diffstat (limited to 'src/win95/inline.h')
| -rw-r--r-- | src/win95/inline.h | 1276 |
1 files changed, 2 insertions, 1274 deletions
diff --git a/src/win95/inline.h b/src/win95/inline.h index da000e4..717881d 100644 --- a/src/win95/inline.h +++ b/src/win95/inline.h @@ -14,15 +14,6 @@ #include "mmx_math.h" #endif -/* - - - Watcom PC Inline Functions. - - Watcom Standard C does not support the C++ "inline" directive, so these - functions have been written as inline assembler instead. - -*/ #ifdef __cplusplus extern "C" { @@ -73,524 +64,7 @@ extern "C" { */ -#ifdef __WATCOMC__ /* inline assember for the Watcom compiler */ - -/* ADD */ - -void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); -# pragma aux ADD_LL = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"add eax,[edi]" \ -"adc edx,[edi+4]" \ -"mov [ebx],eax" \ -"mov [ebx+4],edx" \ -parm[esi] [edi] [ebx] \ -modify[eax edx]; - - -/* ADD ++ */ - -void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); -# pragma aux ADD_LL_PP = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"add [edi],eax" \ -"adc [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - -/* SUB */ - -void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); -# pragma aux SUB_LL = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"sub eax,[edi]" \ -"sbb edx,[edi+4]" \ -"mov [ebx],eax" \ -"mov [ebx+4],edx" \ -parm[esi] [edi] [ebx] \ -modify[eax edx]; - - - -/* SUB -- */ - -void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a); -# pragma aux SUB_LL_MM = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"sub [edi],eax" \ -"sbb [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - -/* - - MUL - - This is the multiply we use, the 32 x 32 = 64 widening version - -*/ - -void MUL_I_WIDE(int a, int b, LONGLONGCH *c); -# pragma aux MUL_I_WIDE = \ -"imul edx"\ -"mov [ebx],eax" \ -"mov [ebx+4],edx" \ -parm[eax] [edx] [ebx] \ -modify[eax edx]; - - - -/* - - CMP - - This substitutes for ==, >, <, >=, <= - -*/ - -int CMP_LL(LONGLONGCH *a, LONGLONGCH *b); -# pragma aux CMP_LL = \ -"mov eax,[ebx]" \ -"mov edx,[ebx+4]" \ -"sub eax,[ecx]" \ -"sbb edx,[ecx+4]" \ -"and edx,edx" \ -"jne llnz" \ -"and eax,eax" \ -"jne llnz" \ -"xor eax,eax" \ -"jmp llgs" \ -"llnz:" \ -"mov eax,1" \ -"and edx,edx" \ -"jge llgs" \ -"neg eax" \ -"llgs:" \ -parm[ebx] [ecx] \ -value[eax] \ -modify[edx]; - - - - -/* EQUALS */ - -void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b); -# pragma aux EQUALS_LL = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"mov [edi],eax" \ -"mov [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - -/* NEGATE */ - -void NEG_LL(LONGLONGCH *a); -# pragma aux NEG_LL = \ -"not dword ptr[esi]" \ -"not dword ptr[esi+4]" \ -"add dword ptr[esi],1" \ -"adc dword ptr[esi+4],0" \ -parm[esi]; - - -/* ASR */ - -void ASR_LL(LONGLONGCH *a, int shift); -# pragma aux ASR_LL = \ -"and eax,eax" \ -"jle asrdn" \ -"asrlp:" \ -"sar dword ptr[esi+4],1" \ -"rcr dword ptr[esi],1" \ -"dec eax" \ -"jne asrlp" \ -"asrdn:" \ -parm[esi] [eax]; - - -/* Convert int to LONGLONGCH */ - -void IntToLL(LONGLONGCH *a, int *b); -# pragma aux IntToLL = \ -"mov eax,[esi]" \ -"cdq" \ -"mov [edi],eax" \ -"mov [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - - - - - - - - -/* - - Fixed Point Multiply. - - - 16.16 * 16.16 -> 16.16 - or - 16.16 * 0.32 -> 0.32 - - A proper version of this function ought to read - 16.16 * 16.16 -> 32.16 - but this would require a long long result - - Algorithm: - - Take the mid 32 bits of the 64 bit result - -*/ - -/* - These functions have been checked for suitability for - a Pentium and look as if they would work adequately. - Might be worth a more detailed look at optimising - them though. -*/ - -#if 0 - -int MUL_FIXED(int a, int b); -# pragma aux MUL_FIXED = \ -"imul edx" \ -"mov ax,dx" \ -"rol eax,16" \ -parm[eax] [edx] \ -value[eax] \ -modify[edx]; - -#else - -int MUL_FIXED(int a, int b); -# pragma aux MUL_FIXED = \ -"imul edx" \ -"shrd eax,edx,16" \ -parm[eax] [edx] \ -value[eax] \ -modify[edx]; - -#endif - - -/* - - Fixed Point Divide - returns a / b - -*/ - -int DIV_FIXED(int a, int b); -# pragma aux DIV_FIXED = \ -"cdq" \ -"rol eax,16" \ -"mov dx,ax" \ -"xor ax,ax" \ -"idiv ebx" \ -parm[eax] [ebx] \ -value[eax] \ -modify[edx]; - - - - -/* - - Multiply and Divide Functions. - -*/ - - -/* - - 32/32 division - - This macro is a function on some other platforms - -*/ - -#define DIV_INT(a, b) ((a) / (b)) - - - - -/* - - A Narrowing 64/32 Division - -*/ - -int NarrowDivide(LONGLONGCH *a, int b); -# pragma aux NarrowDivide = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"idiv ebx" \ -parm[esi] [ebx] \ -value[eax] \ -modify[edx]; - - - -/* - - This function performs a Widening Multiply followed by a Narrowing Divide. - - a = (a * b) / c - -*/ - -int WideMulNarrowDiv(int a, int b, int c); -# pragma aux WideMulNarrowDiv = \ -"imul edx"\ -"idiv ebx" \ -parm[eax] [edx] [ebx] \ -value[eax]; - - - -/* - - Function to rotate a VECTORCH using a MATRIXCH - - This is the C function - - x = MUL_FIXED(m->mat11, v->vx); - x += MUL_FIXED(m->mat21, v->vy); - x += MUL_FIXED(m->mat31, v->vz); - - y = MUL_FIXED(m->mat12, v->vx); - y += MUL_FIXED(m->mat22, v->vy); - y += MUL_FIXED(m->mat32, v->vz); - - z = MUL_FIXED(m->mat13, v->vx); - z += MUL_FIXED(m->mat23, v->vy); - z += MUL_FIXED(m->mat33, v->vz); - - v->vx = x; - v->vy = y; - v->vz = z; - - This is the MUL_FIXED inline assembler function - - imul edx - shrd eax,edx,16 - - -typedef struct matrixch { - - int mat11; 0 - int mat12; 4 - int mat13; 8 - - int mat21; 12 - int mat22; 16 - int mat23; 20 - - int mat31; 24 - int mat32; 28 - int mat33; 32 - -} MATRIXCH; - -*/ - -void RotateVector_ASM(VECTORCH *v, MATRIXCH *m); -# pragma aux RotateVector_ASM = \ -\ -"push eax" \ -"push ebx" \ -"push ecx" \ -"push edx" \ -"push ebp" \ -\ -"mov eax,[edi + 0]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ecx,eax"\ -"mov eax,[edi + 12]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -"mov eax,[edi + 24]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -\ -"mov eax,[edi + 4]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebx,eax"\ -"mov eax,[edi + 16]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -"mov eax,[edi + 28]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -\ -"mov eax,[edi + 8]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebp,eax"\ -"mov eax,[edi + 20]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -"mov eax,[edi + 32]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -\ -"mov [esi + 0],ecx" \ -"mov [esi + 4],ebx" \ -"mov [esi + 8],ebp" \ -\ -"pop ebp" \ -"pop edx" \ -"pop ecx" \ -"pop ebx" \ -"pop eax" \ -\ -parm[esi] [edi]; - - -/* - - Here is the same function, this time copying the result to a second vector - -*/ - -void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); -# pragma aux RotateAndCopyVector_ASM = \ -\ -"push eax" \ -"push ebx" \ -"push ecx" \ -"push ebp" \ -\ -"push edx" \ -"mov eax,[edi + 0]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ecx,eax"\ -"mov eax,[edi + 12]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -"mov eax,[edi + 24]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -\ -"mov eax,[edi + 4]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebx,eax"\ -"mov eax,[edi + 16]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -"mov eax,[edi + 28]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -\ -"mov eax,[edi + 8]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebp,eax"\ -"mov eax,[edi + 20]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -"mov eax,[edi + 32]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -\ -"pop edx" \ -"mov [edx + 0],ecx" \ -"mov [edx + 4],ebx" \ -"mov [edx + 8],ebp" \ -\ -"pop ebp" \ -"pop ecx" \ -"pop ebx" \ -"pop eax" \ -\ -parm[esi] [edx] [edi]; - - - - -#if (SupportFPMathsFunctions || SupportFPSquareRoot) - -/* - - Square Root - - Returns the Square Root of a 32-bit number - -*/ - -static long temp; -static long temp2; - -int SqRoot32(int A); -# pragma aux SqRoot32 = \ -"finit" \ -"mov temp,eax" \ -"fild temp" \ -"fsqrt" \ -"fistp temp2" \ -"fwait" \ -"mov eax,temp2" \ -parm[eax] \ -value[eax]; - -#endif - - -/* - - This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than - the function call "CHP" used by the WATCOM compiler. - -*/ - -static float fptmp; -static int itmp; - -void FloatToInt(void); -# pragma aux FloatToInt = \ -"fld fptmp" \ -"fistp itmp"; - -/* - - This macro makes usage of the above function easier and more elegant - -*/ - -#define f2i(a, b) { \ -fptmp = (b); \ -FloatToInt(); \ -a = itmp;} - -#elif defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */ +#if defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */ /* ADD */ @@ -1111,7 +585,7 @@ a = itmp;} #else -#if 1 /* GCC! */ +/* inline assembly has been moved to mathline.c */ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); @@ -1147,752 +621,6 @@ fti_fptmp = (b); \ FloatToInt(); \ a = fti_itmp;} -#else /* inline stuff */ - -/* ADD */ - - -static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) -{ -/* - _asm - { - mov esi,a - mov edi,b - mov ebx,c - mov eax,[esi] - mov edx,[esi+4] - add eax,[edi] - adc edx,[edi+4] - mov [ebx],eax - mov [ebx+4],edx - } -*/ - -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "addl 0(%%edi), %%eax \n\t" - "adcl 4(%%edi), %%edx \n\t" - "movl %%eax, 0(%%ebx) \n\t" - "movl %%edx, 4(%%ebx) \n\t" - : - : "S" (a), "D" (b), "b" (c) - : "%eax", "%edx", "memory", "cc" - ); - -/* -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "addl 0(%%edi), %%eax \n\t" - "adcl 4(%%edi), %%edx \n\t" - : "=a" (c->lo32), "=d" (c->hi32) - : "S" (a), "D" (b) - ); -*/ -} - -/* ADD ++ */ - -static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) -{ -/* - _asm - { - mov edi,c - mov esi,a - mov eax,[esi] - mov edx,[esi+4] - add [edi],eax - adc [edi+4],edx - } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "addl %%eax, 0(%%edi) \n\t" - "adcl %%edx, 4(%%edi) \n\t" - : - : "D" (c), "S" (a) - : "%eax", "%edx", "memory", "cc" - ); -} - -/* SUB */ - -static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) -{ -/* - _asm - { - mov esi,a - mov edi,b - mov ebx,c - mov eax,[esi] - mov edx,[esi+4] - sub eax,[edi] - sbb edx,[edi+4] - mov [ebx],eax - mov [ebx+4],edx - } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "subl 0(%%edi), %%eax \n\t" - "sbbl 4(%%edi), %%edx \n\t" - "movl %%eax, 0(%%ebx) \n\t" - "movl %%edx, 4(%%ebx) \n\t" - : - : "S" (a), "D" (b), "b" (c) - : "%eax", "%edx", "memory", "cc" - ); -} - -/* SUB -- */ - -static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) -{ -/* - _asm - { - mov edi,c - mov esi,a - mov eax,[esi] - mov edx,[esi+4] - sub [edi],eax - sbb [edi+4],edx - } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "subl %%eax, 0(%%edi) \n\t" - "sbbl %%edx, 4(%%edi) \n\t" - : - : "D" (c), "S" (a) - : "%eax", "%edx", "memory", "cc" - ); -} - -/* - - MUL - - This is the multiply we use, the 32 x 32 = 64 widening version - -*/ - -static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c) -{ -/* - _asm - { - mov eax,a - mov ebx,c - imul b - mov [ebx],eax - mov [ebx+4],edx - } -*/ -__asm__("imull %2 \n\t" - "movl %%eax, 0(%%ebx) \n\t" - "movl %%edx, 4(%%ebx) \n\t" - : - : "a" (a), "b" (c), "q" (b) - : "%edx", "memory", "cc" - ); -} - -/* - - CMP - - This substitutes for ==, >, <, >=, <= - -*/ - -static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) -{ - int retval; -/* - _asm - { - mov ebx,a - mov ecx,b - mov eax,[ebx] - mov edx,[ebx+4] - sub eax,[ecx] - sbb edx,[ecx+4] - and edx,edx - jne llnz - and eax,eax - je llgs - llnz: - mov retval,1 - and edx,edx - jge llgs - neg retval - llgs: - } -*/ -/* TODO */ -__asm__("movl 0(%%ebx), %%eax \n\t" - "movl 4(%%ebx), %%edx \n\t" - "subl 0(%%ecx), %%eax \n\t" - "sbbl 4(%%ecx), %%edx \n\t" - "xorl %0, %0 \n\t" /* hopefully it doesn't pick %eax or %edx */ - "andl %%edx, %%edx \n\t" - "jne 0 \n\t" /* llnz */ - "andl %%eax, %%eax \n\t" - "je 1 \n" /* llgs */ -"0: \n\t" /* llnz */ - "movl $1, %0 \n\t" - "andl %%edx, %%edx \n\t" - "jge 1 \n\t" /* llgs */ - "negl %0 \n" -"1: \n\t" /* llgs */ - : "=r" (retval) - : "b" (a), "c" (b) - : "%eax", "%edx", "memory", "cc" - ); - - return retval; -} - -/* EQUALS */ - -static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) -{ -/* - _asm - { - mov edi,a - mov esi,b - mov eax,[esi] - mov edx,[esi+4] - mov [edi],eax - mov [edi+4],edx - } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "movl %%eax, 0(%%edi) \n\t" - "movl %%edx, 4(%%edi) \n\t" - : - : "D" (a), "S" (b) - : "%eax", "%edx", "memory" - ); -} - -/* NEGATE */ - -static __inline__ void NEG_LL(LONGLONGCH *a) -{ -/* - _asm - { - mov esi,a - not dword ptr[esi] - not dword ptr[esi+4] - add dword ptr[esi],1 - adc dword ptr[esi+4],0 - } -*/ -__asm__("notl 0(%%esi) \n\t" - "notl 4(%%esi) \n\t" - "addl $1, 0(%%esi) \n\t" - "adcl $0, 4(%%esi) \n\t" - : - : "S" (a) - : "memory", "cc" - ); -} - -/* ASR */ - -static __inline__ void ASR_LL(LONGLONGCH *a, int shift) -{ -/* - _asm - { - mov esi,a - mov eax,shift - and eax,eax - jle asrdn - asrlp: - sar dword ptr[esi+4],1 - rcr dword ptr[esi],1 - dec eax - jne asrlp - asrdn: - } -*/ -__asm__("andl %%eax, %%eax \n\t" - "jle 0 \n" /* asrdn */ -"1: \n\t" /* asrlp */ - "sarl $1, 4(%%esi) \n\t" - "rcrl $1, 0(%%esi) \n\t" - "decl %%eax \n\t" - "jne 1 \n" -"0: \n\t" - : - : "S" (a), "a" (shift) - : "memory", "cc" - ); - -} - -/* Convert int to LONGLONGCH */ - -static __inline__ void IntToLL(LONGLONGCH *a, int *b) -{ -/* - _asm - { - mov esi,b - mov edi,a - mov eax,[esi] - cdq - mov [edi],eax - mov [edi+4],edx - } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "cdq \n\t" - "movl %%eax, 0(%%edi) \n\t" - "movl %%edx, 4(%%edi) \n\t" - : - : "S" (b), "D" (a) - : "%eax", "%edx", "memory", "cc" - ); - -} - -/* - - Fixed Point Multiply. - - - 16.16 * 16.16 -> 16.16 - or - 16.16 * 0.32 -> 0.32 - - A proper version of this function ought to read - 16.16 * 16.16 -> 32.16 - but this would require a long long result - - Algorithm: - - Take the mid 32 bits of the 64 bit result - -*/ - -/* - These functions have been checked for suitability for - a Pentium and look as if they would work adequately. - Might be worth a more detailed look at optimising - them though. -*/ - -static __inline__ int MUL_FIXED(int a, int b) -{ - int retval; -/* - _asm - { - mov eax,a - imul b - shrd eax,edx,16 - mov retval,eax - } -*/ -/* TODO */ -__asm__("imull %2 \n\t" - "shrdl $16, %%edx, %%eax \n\t" - : "=a" (retval) - : "a" (a), "q" (b) - : "%edx", "cc" - ); - return retval; -} - -/* - - Fixed Point Divide - returns a / b - -*/ - -static __inline__ int DIV_FIXED(int a, int b) -{ - int retval; -/* - _asm - { - mov eax,a - cdq - rol eax,16 - mov dx,ax - xor ax,ax - idiv b - mov retval,eax - } -*/ -/* TODO */ -__asm__("cdq \n\t" - "roll $16, %%eax \n\t" - "mov %%ax, %%dx \n\t" - "xor %%ax, %%ax \n\t" - "idivl %2 \n\t" - : "=a" (retval) - : "a" (a), "q" (b) - : "%edx", "cc" - ); - return retval; -} - -/* - - Multiply and Divide Functions. - -*/ - - -/* - - 32/32 division - - This macro is a function on some other platforms - -*/ - -#define DIV_INT(a, b) ((a) / (b)) - -/* - - A Narrowing 64/32 Division - -*/ - -static __inline__ int NarrowDivide(LONGLONGCH *a, int b) -{ - int retval; -/* - _asm - { - mov esi,a - mov eax,[esi] - mov edx,[esi+4] - idiv b - mov retval,eax - } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "idivl %2 \n\t" - : "=a" (retval) - : "S" (a), "q" (b) - : "%edx", "cc" - ); - return retval; -} - -/* - - This function performs a Widening Multiply followed by a Narrowing Divide. - - a = (a * b) / c - -*/ - -static __inline__ int WideMulNarrowDiv(int a, int b, int c) -{ -#if 0 /* TODO: broken? */ - int retval; -/* - _asm - { - mov eax,a - imul b - idiv c - mov retval,eax - } -*/ -/* TODO */ -__asm__("imull %2 \n\t" - "idivl %3 \n\t" - : "=a" (retval) - : "a" (a), "q" (b), "q" (c) - : "cc" - ); - return retval; -#endif - return (a * b) / c; -} - -/* - - Function to rotate a VECTORCH using a MATRIXCH - - This is the C function - - x = MUL_FIXED(m->mat11, v->vx); - x += MUL_FIXED(m->mat21, v->vy); - x += MUL_FIXED(m->mat31, v->vz); - - y = MUL_FIXED(m->mat12, v->vx); - y += MUL_FIXED(m->mat22, v->vy); - y += MUL_FIXED(m->mat32, v->vz); - - z = MUL_FIXED(m->mat13, v->vx); - z += MUL_FIXED(m->mat23, v->vy); - z += MUL_FIXED(m->mat33, v->vz); - - v->vx = x; - v->vy = y; - v->vz = z; - - This is the MUL_FIXED inline assembler function - - imul edx - shrd eax,edx,16 - - -typedef struct matrixch { - - int mat11; 0 - int mat12; 4 - int mat13; 8 - - int mat21; 12 - int mat22; 16 - int mat23; 20 - - int mat31; 24 - int mat32; 28 - int mat33; 32 - -} MATRIXCH; - -*/ - -#if 0 /* TODO if these are needed */ -static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m) -{ - _asm - { - mov esi,v - mov edi,m - - mov eax,[edi + 0] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ecx,eax - mov eax,[edi + 12] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ecx,eax - mov eax,[edi + 24] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ecx,eax - - mov eax,[edi + 4] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebx,eax - mov eax,[edi + 16] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebx,eax - mov eax,[edi + 28] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebx,eax - - mov eax,[edi + 8] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebp,eax - mov eax,[edi + 20] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebp,eax - mov eax,[edi + 32] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebp,eax - - mov [esi + 0],ecx - mov [esi + 4],ebx - mov [esi + 8],ebp - } -} - -/* - - Here is the same function, this time copying the result to a second vector - -*/ - -static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m) -{ - _asm - { - mov esi,v1 - mov edi,m - - mov eax,[edi + 0] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ecx,eax - mov eax,[edi + 12] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ecx,eax - mov eax,[edi + 24] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ecx,eax - - mov eax,[edi + 4] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebx,eax - mov eax,[edi + 16] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebx,eax - mov eax,[edi + 28] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebx,eax - - mov eax,[edi + 8] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebp,eax - mov eax,[edi + 20] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebp,eax - mov eax,[edi + 32] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebp,eax - - mov edx,v2 - mov [edx + 0],ecx - mov [edx + 4],ebx - mov [edx + 8],ebp - } -} -#endif - -#if (SupportFPMathsFunctions || SupportFPSquareRoot) - -/* - - Square Root - - Returns the Square Root of a 32-bit number - -*/ - -extern int sqrt_temp1; -extern int sqrt_temp2; - -#include <math.h> -static __inline__ int SqRoot32(int A) -{ -#if 0 - sqrt_temp1 = A; -/* - _asm - { - finit - fild A - fsqrt - fistp temp2 - fwait - } -*/ - -__asm__("finit \n\t" - "fild sqrt_temp1 \n\t" - "fsqrt \n\t" - "fistp sqrt_temp2 \n\t" - "fwait \n\t" - : - : - : "memory", "cc" - ); - - return sqrt_temp2; -#endif -{ /* TODO: clean this please */ - double x = A; - double retvald = sqrt(x); - int retval = retvald; - return retval; -} -} - -#endif - - -/* - - This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than - the function call "CHP" used by the WATCOM compiler. - -*/ - -extern float fti_fptmp; -extern int fti_itmp; - -static __inline__ int FloatToInt(float fptmp) -{ -#if 0 - fti_fptmp = fptmp; -/* - _asm - { - fld fptmp - fistp itmp - } -*/ -__asm__("fld fti_fptmp \n\t" - "fistp fti_itmp \n\t" - : - : - : "memory", "cc" - ); - - return fti_itmp; -#endif - - return fptmp; -} - -/* - - This macro makes usage of the above function easier and more elegant - -*/ - -#define f2i(a, b) { \ -a = FloatToInt(b); \ -} - - -#if 0 -int SqRoot32(int A); -void FloatToInt(); -#define f2i(a, b) { \ -fti_fptmp = (b); \ -FloatToInt(); \ -a = fti_itmp;} -#endif - -#endif - #endif int WideMul2NarrowDiv(int a, int b, int c, int d, int e); |
