From c948a593912fef46e2cea6da9b0dd0c7c36054c7 Mon Sep 17 00:00:00 2001 From: Steven Fuller Date: Sun, 18 May 2008 21:32:34 -0700 Subject: Removed Watcom Compiler support. --- src/win95/inline.h | 1450 ++++------------------------------------------------ 1 file changed, 89 insertions(+), 1361 deletions(-) (limited to 'src/win95/inline.h') diff --git a/src/win95/inline.h b/src/win95/inline.h index da000e4..717881d 100644 --- a/src/win95/inline.h +++ b/src/win95/inline.h @@ -14,1147 +14,62 @@ #include "mmx_math.h" #endif -/* - - - Watcom PC Inline Functions. - - Watcom Standard C does not support the C++ "inline" directive, so these - functions have been written as inline assembler instead. - -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - Standard macros. Note that FIXED_TO_INT - and INT_TO_FIXED are very suboptimal in - this version!!! - Also, MUL_INT and ISR are ONLY intended - to be used in Win95 so that Saturn versions - of the same code can be compiled using calls - to hand optimised assembler functions, i.e. - for code that is never intended to be run on - a Saturn they are unnecessary. -*/ - -#define OUR_ABS(x) (((x) < 0) ? -(x) : (x)) -#define OUR_SIGN(x) (((x) < 0) ? -1 : +1) -#define OUR_INT_TO_FIXED(x) (int) ((x) * (65536)) -#define OUR_FIXED_TO_INT(x) (int) ((x) / (65536)) -#define OUR_MUL_INT(a, b) ((a) * (b)) -#define OUR_ISR(a, shift) ((a) >> (shift)) - -/* - - Platform Specific 64-Bit Operator Functions - - Not all compilers support 64-bit operations, and some platforms may not - even support 64-bit numbers. Support for 64-bit operations is therefore - provided in the platform specific fucntions below. - - For C++ a mew class could be defined. However the current system is not - compiled as C++ and the Cygnus GNU C++ is not currently working. - -*/ - - -/* - These functions have been checked for suitability for - a Pentium and look as if they would pair up okay. - Might be worth a more detailed look at optimising - them though. - Obviously there is a problem with values not being - loaded into registers for these functions, but this - may be unavoidable for 64 bit values on a Watcom - platform. -*/ - - -#ifdef __WATCOMC__ /* inline assember for the Watcom compiler */ - -/* ADD */ - -void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); -# pragma aux ADD_LL = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"add eax,[edi]" \ -"adc edx,[edi+4]" \ -"mov [ebx],eax" \ -"mov [ebx+4],edx" \ -parm[esi] [edi] [ebx] \ -modify[eax edx]; - - -/* ADD ++ */ - -void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); -# pragma aux ADD_LL_PP = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"add [edi],eax" \ -"adc [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - -/* SUB */ - -void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); -# pragma aux SUB_LL = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"sub eax,[edi]" \ -"sbb edx,[edi+4]" \ -"mov [ebx],eax" \ -"mov [ebx+4],edx" \ -parm[esi] [edi] [ebx] \ -modify[eax edx]; - - - -/* SUB -- */ - -void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a); -# pragma aux SUB_LL_MM = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"sub [edi],eax" \ -"sbb [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - -/* - - MUL - - This is the multiply we use, the 32 x 32 = 64 widening version - -*/ - -void MUL_I_WIDE(int a, int b, LONGLONGCH *c); -# pragma aux MUL_I_WIDE = \ -"imul edx"\ -"mov [ebx],eax" \ -"mov [ebx+4],edx" \ -parm[eax] [edx] [ebx] \ -modify[eax edx]; - - - -/* - - CMP - - This substitutes for ==, >, <, >=, <= - -*/ - -int CMP_LL(LONGLONGCH *a, LONGLONGCH *b); -# pragma aux CMP_LL = \ -"mov eax,[ebx]" \ -"mov edx,[ebx+4]" \ -"sub eax,[ecx]" \ -"sbb edx,[ecx+4]" \ -"and edx,edx" \ -"jne llnz" \ -"and eax,eax" \ -"jne llnz" \ -"xor eax,eax" \ -"jmp llgs" \ -"llnz:" \ -"mov eax,1" \ -"and edx,edx" \ -"jge llgs" \ -"neg eax" \ -"llgs:" \ -parm[ebx] [ecx] \ -value[eax] \ -modify[edx]; - - - - -/* EQUALS */ - -void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b); -# pragma aux EQUALS_LL = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"mov [edi],eax" \ -"mov [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - -/* NEGATE */ - -void NEG_LL(LONGLONGCH *a); -# pragma aux NEG_LL = \ -"not dword ptr[esi]" \ -"not dword ptr[esi+4]" \ -"add dword ptr[esi],1" \ -"adc dword ptr[esi+4],0" \ -parm[esi]; - - -/* ASR */ - -void ASR_LL(LONGLONGCH *a, int shift); -# pragma aux ASR_LL = \ -"and eax,eax" \ -"jle asrdn" \ -"asrlp:" \ -"sar dword ptr[esi+4],1" \ -"rcr dword ptr[esi],1" \ -"dec eax" \ -"jne asrlp" \ -"asrdn:" \ -parm[esi] [eax]; - - -/* Convert int to LONGLONGCH */ - -void IntToLL(LONGLONGCH *a, int *b); -# pragma aux IntToLL = \ -"mov eax,[esi]" \ -"cdq" \ -"mov [edi],eax" \ -"mov [edi+4],edx" \ -parm[edi] [esi] \ -modify[eax edx]; - - - - - - - - - -/* - - Fixed Point Multiply. - - - 16.16 * 16.16 -> 16.16 - or - 16.16 * 0.32 -> 0.32 - - A proper version of this function ought to read - 16.16 * 16.16 -> 32.16 - but this would require a long long result - - Algorithm: - - Take the mid 32 bits of the 64 bit result - -*/ - -/* - These functions have been checked for suitability for - a Pentium and look as if they would work adequately. - Might be worth a more detailed look at optimising - them though. -*/ - -#if 0 - -int MUL_FIXED(int a, int b); -# pragma aux MUL_FIXED = \ -"imul edx" \ -"mov ax,dx" \ -"rol eax,16" \ -parm[eax] [edx] \ -value[eax] \ -modify[edx]; - -#else - -int MUL_FIXED(int a, int b); -# pragma aux MUL_FIXED = \ -"imul edx" \ -"shrd eax,edx,16" \ -parm[eax] [edx] \ -value[eax] \ -modify[edx]; - -#endif - - -/* - - Fixed Point Divide - returns a / b - -*/ - -int DIV_FIXED(int a, int b); -# pragma aux DIV_FIXED = \ -"cdq" \ -"rol eax,16" \ -"mov dx,ax" \ -"xor ax,ax" \ -"idiv ebx" \ -parm[eax] [ebx] \ -value[eax] \ -modify[edx]; - - - - -/* - - Multiply and Divide Functions. - -*/ - - -/* - - 32/32 division - - This macro is a function on some other platforms - -*/ - -#define DIV_INT(a, b) ((a) / (b)) - - - - -/* - - A Narrowing 64/32 Division - -*/ - -int NarrowDivide(LONGLONGCH *a, int b); -# pragma aux NarrowDivide = \ -"mov eax,[esi]" \ -"mov edx,[esi+4]" \ -"idiv ebx" \ -parm[esi] [ebx] \ -value[eax] \ -modify[edx]; - - - -/* - - This function performs a Widening Multiply followed by a Narrowing Divide. - - a = (a * b) / c - -*/ - -int WideMulNarrowDiv(int a, int b, int c); -# pragma aux WideMulNarrowDiv = \ -"imul edx"\ -"idiv ebx" \ -parm[eax] [edx] [ebx] \ -value[eax]; - - - -/* - - Function to rotate a VECTORCH using a MATRIXCH - - This is the C function - - x = MUL_FIXED(m->mat11, v->vx); - x += MUL_FIXED(m->mat21, v->vy); - x += MUL_FIXED(m->mat31, v->vz); - - y = MUL_FIXED(m->mat12, v->vx); - y += MUL_FIXED(m->mat22, v->vy); - y += MUL_FIXED(m->mat32, v->vz); - - z = MUL_FIXED(m->mat13, v->vx); - z += MUL_FIXED(m->mat23, v->vy); - z += MUL_FIXED(m->mat33, v->vz); - - v->vx = x; - v->vy = y; - v->vz = z; - - This is the MUL_FIXED inline assembler function - - imul edx - shrd eax,edx,16 - - -typedef struct matrixch { - - int mat11; 0 - int mat12; 4 - int mat13; 8 - - int mat21; 12 - int mat22; 16 - int mat23; 20 - - int mat31; 24 - int mat32; 28 - int mat33; 32 - -} MATRIXCH; - -*/ - -void RotateVector_ASM(VECTORCH *v, MATRIXCH *m); -# pragma aux RotateVector_ASM = \ -\ -"push eax" \ -"push ebx" \ -"push ecx" \ -"push edx" \ -"push ebp" \ -\ -"mov eax,[edi + 0]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ecx,eax"\ -"mov eax,[edi + 12]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -"mov eax,[edi + 24]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -\ -"mov eax,[edi + 4]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebx,eax"\ -"mov eax,[edi + 16]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -"mov eax,[edi + 28]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -\ -"mov eax,[edi + 8]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebp,eax"\ -"mov eax,[edi + 20]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -"mov eax,[edi + 32]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -\ -"mov [esi + 0],ecx" \ -"mov [esi + 4],ebx" \ -"mov [esi + 8],ebp" \ -\ -"pop ebp" \ -"pop edx" \ -"pop ecx" \ -"pop ebx" \ -"pop eax" \ -\ -parm[esi] [edi]; - - -/* - - Here is the same function, this time copying the result to a second vector - -*/ - -void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); -# pragma aux RotateAndCopyVector_ASM = \ -\ -"push eax" \ -"push ebx" \ -"push ecx" \ -"push ebp" \ -\ -"push edx" \ -"mov eax,[edi + 0]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ecx,eax"\ -"mov eax,[edi + 12]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -"mov eax,[edi + 24]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ecx,eax" \ -\ -"mov eax,[edi + 4]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebx,eax"\ -"mov eax,[edi + 16]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -"mov eax,[edi + 28]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebx,eax" \ -\ -"mov eax,[edi + 8]" \ -"imul DWORD PTR [esi + 0]" \ -"shrd eax,edx,16" \ -"mov ebp,eax"\ -"mov eax,[edi + 20]" \ -"imul DWORD PTR [esi + 4]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -"mov eax,[edi + 32]" \ -"imul DWORD PTR [esi + 8]" \ -"shrd eax,edx,16" \ -"add ebp,eax" \ -\ -"pop edx" \ -"mov [edx + 0],ecx" \ -"mov [edx + 4],ebx" \ -"mov [edx + 8],ebp" \ -\ -"pop ebp" \ -"pop ecx" \ -"pop ebx" \ -"pop eax" \ -\ -parm[esi] [edx] [edi]; - - - - -#if (SupportFPMathsFunctions || SupportFPSquareRoot) - -/* - - Square Root - - Returns the Square Root of a 32-bit number - -*/ - -static long temp; -static long temp2; - -int SqRoot32(int A); -# pragma aux SqRoot32 = \ -"finit" \ -"mov temp,eax" \ -"fild temp" \ -"fsqrt" \ -"fistp temp2" \ -"fwait" \ -"mov eax,temp2" \ -parm[eax] \ -value[eax]; - -#endif - - -/* - - This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than - the function call "CHP" used by the WATCOM compiler. - -*/ - -static float fptmp; -static int itmp; - -void FloatToInt(void); -# pragma aux FloatToInt = \ -"fld fptmp" \ -"fistp itmp"; - -/* - - This macro makes usage of the above function easier and more elegant - -*/ - -#define f2i(a, b) { \ -fptmp = (b); \ -FloatToInt(); \ -a = itmp;} - -#elif defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */ - -/* ADD */ - -static void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) -{ - _asm - { - mov esi,a - mov edi,b - mov ebx,c - mov eax,[esi] - mov edx,[esi+4] - add eax,[edi] - adc edx,[edi+4] - mov [ebx],eax - mov [ebx+4],edx - } -} - -/* ADD ++ */ - -static void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) -{ - _asm - { - mov edi,c - mov esi,a - mov eax,[esi] - mov edx,[esi+4] - add [edi],eax - adc [edi+4],edx - } -} - -/* SUB */ - -static void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) -{ - _asm - { - mov esi,a - mov edi,b - mov ebx,c - mov eax,[esi] - mov edx,[esi+4] - sub eax,[edi] - sbb edx,[edi+4] - mov [ebx],eax - mov [ebx+4],edx - } -} - -/* SUB -- */ - -static void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) -{ - _asm - { - mov edi,c - mov esi,a - mov eax,[esi] - mov edx,[esi+4] - sub [edi],eax - sbb [edi+4],edx - } -} - -/* - - MUL - - This is the multiply we use, the 32 x 32 = 64 widening version - -*/ - -static void MUL_I_WIDE(int a, int b, LONGLONGCH *c) -{ - _asm - { - mov eax,a - mov ebx,c - imul b - mov [ebx],eax - mov [ebx+4],edx - } -} - -/* - - CMP - - This substitutes for ==, >, <, >=, <= - -*/ - -static int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) -{ - int retval = 0; - _asm - { - mov ebx,a - mov ecx,b - mov eax,[ebx] - mov edx,[ebx+4] - sub eax,[ecx] - sbb edx,[ecx+4] - and edx,edx - jne llnz - and eax,eax - je llgs - llnz: - mov retval,1 - and edx,edx - jge llgs - neg retval - llgs: - } - return retval; -} - -/* EQUALS */ - -static void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) -{ - _asm - { - mov edi,a - mov esi,b - mov eax,[esi] - mov edx,[esi+4] - mov [edi],eax - mov [edi+4],edx - } -} - -/* NEGATE */ - -static void NEG_LL(LONGLONGCH *a) -{ - _asm - { - mov esi,a - not dword ptr[esi] - not dword ptr[esi+4] - add dword ptr[esi],1 - adc dword ptr[esi+4],0 - } -} - -/* ASR */ - -static void ASR_LL(LONGLONGCH *a, int shift) -{ - _asm - { - mov esi,a - mov eax,shift - and eax,eax - jle asrdn - asrlp: - sar dword ptr[esi+4],1 - rcr dword ptr[esi],1 - dec eax - jne asrlp - asrdn: - } -} - -/* Convert int to LONGLONGCH */ - -static void IntToLL(LONGLONGCH *a, int *b) -{ - _asm - { - mov esi,b - mov edi,a - mov eax,[esi] - cdq - mov [edi],eax - mov [edi+4],edx - } -} - -/* - - Fixed Point Multiply. - - - 16.16 * 16.16 -> 16.16 - or - 16.16 * 0.32 -> 0.32 - - A proper version of this function ought to read - 16.16 * 16.16 -> 32.16 - but this would require a long long result - - Algorithm: - - Take the mid 32 bits of the 64 bit result - -*/ - -/* - These functions have been checked for suitability for - a Pentium and look as if they would work adequately. - Might be worth a more detailed look at optimising - them though. -*/ - -static int MUL_FIXED(int a, int b) -{ - int retval; - _asm - { - mov eax,a - imul b - shrd eax,edx,16 - mov retval,eax - } - return retval; -} - -/* - - Fixed Point Divide - returns a / b - -*/ - -static int DIV_FIXED(int a, int b) -{ - int retval; - _asm - { - mov eax,a - cdq - rol eax,16 - mov dx,ax - xor ax,ax - idiv b - mov retval,eax - } - return retval; -} - -/* - - Multiply and Divide Functions. - -*/ - - -/* - - 32/32 division - - This macro is a function on some other platforms - -*/ - -#define DIV_INT(a, b) ((a) / (b)) - -/* - - A Narrowing 64/32 Division - -*/ - -static int NarrowDivide(LONGLONGCH *a, int b) -{ - int retval; - _asm - { - mov esi,a - mov eax,[esi] - mov edx,[esi+4] - idiv b - mov retval,eax - } - return retval; -} - -/* - - This function performs a Widening Multiply followed by a Narrowing Divide. - - a = (a * b) / c - -*/ - -static int WideMulNarrowDiv(int a, int b, int c) -{ - int retval; - _asm - { - mov eax,a - imul b - idiv c - mov retval,eax - } - return retval; -} - -/* - - Function to rotate a VECTORCH using a MATRIXCH - - This is the C function - - x = MUL_FIXED(m->mat11, v->vx); - x += MUL_FIXED(m->mat21, v->vy); - x += MUL_FIXED(m->mat31, v->vz); - - y = MUL_FIXED(m->mat12, v->vx); - y += MUL_FIXED(m->mat22, v->vy); - y += MUL_FIXED(m->mat32, v->vz); - - z = MUL_FIXED(m->mat13, v->vx); - z += MUL_FIXED(m->mat23, v->vy); - z += MUL_FIXED(m->mat33, v->vz); - - v->vx = x; - v->vy = y; - v->vz = z; - - This is the MUL_FIXED inline assembler function - - imul edx - shrd eax,edx,16 - - -typedef struct matrixch { - - int mat11; 0 - int mat12; 4 - int mat13; 8 - - int mat21; 12 - int mat22; 16 - int mat23; 20 - - int mat31; 24 - int mat32; 28 - int mat33; 32 - -} MATRIXCH; - -*/ - -static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m) -{ - _asm - { - mov esi,v - mov edi,m - - mov eax,[edi + 0] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ecx,eax - mov eax,[edi + 12] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ecx,eax - mov eax,[edi + 24] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ecx,eax - - mov eax,[edi + 4] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebx,eax - mov eax,[edi + 16] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebx,eax - mov eax,[edi + 28] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebx,eax - - mov eax,[edi + 8] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebp,eax - mov eax,[edi + 20] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebp,eax - mov eax,[edi + 32] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebp,eax - - mov [esi + 0],ecx - mov [esi + 4],ebx - mov [esi + 8],ebp - } -} - -/* - - Here is the same function, this time copying the result to a second vector - -*/ - -static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m) -{ - _asm - { - mov esi,v1 - mov edi,m - - mov eax,[edi + 0] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ecx,eax - mov eax,[edi + 12] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ecx,eax - mov eax,[edi + 24] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ecx,eax - - mov eax,[edi + 4] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebx,eax - mov eax,[edi + 16] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebx,eax - mov eax,[edi + 28] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebx,eax - - mov eax,[edi + 8] - imul DWORD PTR [esi + 0] - shrd eax,edx,16 - mov ebp,eax - mov eax,[edi + 20] - imul DWORD PTR [esi + 4] - shrd eax,edx,16 - add ebp,eax - mov eax,[edi + 32] - imul DWORD PTR [esi + 8] - shrd eax,edx,16 - add ebp,eax - - mov edx,v2 - mov [edx + 0],ecx - mov [edx + 4],ebx - mov [edx + 8],ebp - } -} - -#if (SupportFPMathsFunctions || SupportFPSquareRoot) - -/* - - Square Root - - Returns the Square Root of a 32-bit number - -*/ - -static long temp; -static long temp2; - -static int SqRoot32(int A) -{ - _asm - { - finit - fild A - fsqrt - fistp temp2 - fwait - } - return (int)temp2; -} +#ifdef __cplusplus +extern "C" { #endif - -/* - - This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than - the function call "CHP" used by the WATCOM compiler. - +/* + Standard macros. Note that FIXED_TO_INT + and INT_TO_FIXED are very suboptimal in + this version!!! + Also, MUL_INT and ISR are ONLY intended + to be used in Win95 so that Saturn versions + of the same code can be compiled using calls + to hand optimised assembler functions, i.e. + for code that is never intended to be run on + a Saturn they are unnecessary. */ -static float fptmp; -static int itmp; - -static void FloatToInt(void) -{ - _asm - { - fld fptmp - fistp itmp - } -} +#define OUR_ABS(x) (((x) < 0) ? -(x) : (x)) +#define OUR_SIGN(x) (((x) < 0) ? -1 : +1) +#define OUR_INT_TO_FIXED(x) (int) ((x) * (65536)) +#define OUR_FIXED_TO_INT(x) (int) ((x) / (65536)) +#define OUR_MUL_INT(a, b) ((a) * (b)) +#define OUR_ISR(a, shift) ((a) >> (shift)) /* - This macro makes usage of the above function easier and more elegant - -*/ - -#define f2i(a, b) { \ -fptmp = (b); \ -FloatToInt(); \ -a = itmp;} + Platform Specific 64-Bit Operator Functions -#else + Not all compilers support 64-bit operations, and some platforms may not + even support 64-bit numbers. Support for 64-bit operations is therefore + provided in the platform specific fucntions below. -#if 1 /* GCC! */ -void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); -void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); -void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); -void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a); -void MUL_I_WIDE(int a, int b, LONGLONGCH *c); -int CMP_LL(LONGLONGCH *a, LONGLONGCH *b); -void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b); -void NEG_LL(LONGLONGCH *a); -void ASR_LL(LONGLONGCH *a, int shift); -void IntToLL(LONGLONGCH *a, int *b); -int MUL_FIXED(int a, int b); -int DIV_FIXED(int a, int b); + For C++ a mew class could be defined. However the current system is not + compiled as C++ and the Cygnus GNU C++ is not currently working. -#define DIV_INT(a, b) ((a) / (b)) +*/ -int NarrowDivide(LONGLONGCH *a, int b); -int WideMulNarrowDiv(int a, int b, int c); -void RotateVector_ASM(VECTORCH *v, MATRIXCH *m); -void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); /* -int FloatToInt(float); -#define f2i(a, b) { a = FloatToInt(b); } + These functions have been checked for suitability for + a Pentium and look as if they would pair up okay. + Might be worth a more detailed look at optimising + them though. + Obviously there is a problem with values not being + loaded into registers for these functions, but this + may be unavoidable for 64 bit values on a Watcom + platform. */ -int SqRoot32(int A); -void FloatToInt(); -extern float fti_fptmp; -extern int fti_itmp; - -#define f2i(a, b) { \ -fti_fptmp = (b); \ -FloatToInt(); \ -a = fti_itmp;} -#else /* inline stuff */ +#if defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */ /* ADD */ - -static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) +static void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) { -/* _asm { mov esi,a @@ -1167,35 +82,12 @@ static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) mov [ebx],eax mov [ebx+4],edx } -*/ - -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "addl 0(%%edi), %%eax \n\t" - "adcl 4(%%edi), %%edx \n\t" - "movl %%eax, 0(%%ebx) \n\t" - "movl %%edx, 4(%%ebx) \n\t" - : - : "S" (a), "D" (b), "b" (c) - : "%eax", "%edx", "memory", "cc" - ); - -/* -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "addl 0(%%edi), %%eax \n\t" - "adcl 4(%%edi), %%edx \n\t" - : "=a" (c->lo32), "=d" (c->hi32) - : "S" (a), "D" (b) - ); -*/ } /* ADD ++ */ -static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) +static void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) { -/* _asm { mov edi,c @@ -1205,22 +97,12 @@ static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) add [edi],eax adc [edi+4],edx } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "addl %%eax, 0(%%edi) \n\t" - "adcl %%edx, 4(%%edi) \n\t" - : - : "D" (c), "S" (a) - : "%eax", "%edx", "memory", "cc" - ); } /* SUB */ -static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) +static void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) { -/* _asm { mov esi,a @@ -1233,24 +115,12 @@ static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) mov [ebx],eax mov [ebx+4],edx } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "subl 0(%%edi), %%eax \n\t" - "sbbl 4(%%edi), %%edx \n\t" - "movl %%eax, 0(%%ebx) \n\t" - "movl %%edx, 4(%%ebx) \n\t" - : - : "S" (a), "D" (b), "b" (c) - : "%eax", "%edx", "memory", "cc" - ); } /* SUB -- */ -static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) +static void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) { -/* _asm { mov edi,c @@ -1260,15 +130,6 @@ static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) sub [edi],eax sbb [edi+4],edx } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "subl %%eax, 0(%%edi) \n\t" - "sbbl %%edx, 4(%%edi) \n\t" - : - : "D" (c), "S" (a) - : "%eax", "%edx", "memory", "cc" - ); } /* @@ -1279,9 +140,8 @@ __asm__("movl 0(%%esi), %%eax \n\t" */ -static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c) +static void MUL_I_WIDE(int a, int b, LONGLONGCH *c) { -/* _asm { mov eax,a @@ -1290,14 +150,6 @@ static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c) mov [ebx],eax mov [ebx+4],edx } -*/ -__asm__("imull %2 \n\t" - "movl %%eax, 0(%%ebx) \n\t" - "movl %%edx, 4(%%ebx) \n\t" - : - : "a" (a), "b" (c), "q" (b) - : "%edx", "memory", "cc" - ); } /* @@ -1308,10 +160,9 @@ __asm__("imull %2 \n\t" */ -static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) +static int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) { - int retval; -/* + int retval = 0; _asm { mov ebx,a @@ -1331,36 +182,13 @@ static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) neg retval llgs: } -*/ -/* TODO */ -__asm__("movl 0(%%ebx), %%eax \n\t" - "movl 4(%%ebx), %%edx \n\t" - "subl 0(%%ecx), %%eax \n\t" - "sbbl 4(%%ecx), %%edx \n\t" - "xorl %0, %0 \n\t" /* hopefully it doesn't pick %eax or %edx */ - "andl %%edx, %%edx \n\t" - "jne 0 \n\t" /* llnz */ - "andl %%eax, %%eax \n\t" - "je 1 \n" /* llgs */ -"0: \n\t" /* llnz */ - "movl $1, %0 \n\t" - "andl %%edx, %%edx \n\t" - "jge 1 \n\t" /* llgs */ - "negl %0 \n" -"1: \n\t" /* llgs */ - : "=r" (retval) - : "b" (a), "c" (b) - : "%eax", "%edx", "memory", "cc" - ); - return retval; } /* EQUALS */ -static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) +static void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) { -/* _asm { mov edi,a @@ -1370,22 +198,12 @@ static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) mov [edi],eax mov [edi+4],edx } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "movl %%eax, 0(%%edi) \n\t" - "movl %%edx, 4(%%edi) \n\t" - : - : "D" (a), "S" (b) - : "%eax", "%edx", "memory" - ); } /* NEGATE */ -static __inline__ void NEG_LL(LONGLONGCH *a) +static void NEG_LL(LONGLONGCH *a) { -/* _asm { mov esi,a @@ -1394,22 +212,12 @@ static __inline__ void NEG_LL(LONGLONGCH *a) add dword ptr[esi],1 adc dword ptr[esi+4],0 } -*/ -__asm__("notl 0(%%esi) \n\t" - "notl 4(%%esi) \n\t" - "addl $1, 0(%%esi) \n\t" - "adcl $0, 4(%%esi) \n\t" - : - : "S" (a) - : "memory", "cc" - ); } /* ASR */ -static __inline__ void ASR_LL(LONGLONGCH *a, int shift) +static void ASR_LL(LONGLONGCH *a, int shift) { -/* _asm { mov esi,a @@ -1423,27 +231,12 @@ static __inline__ void ASR_LL(LONGLONGCH *a, int shift) jne asrlp asrdn: } -*/ -__asm__("andl %%eax, %%eax \n\t" - "jle 0 \n" /* asrdn */ -"1: \n\t" /* asrlp */ - "sarl $1, 4(%%esi) \n\t" - "rcrl $1, 0(%%esi) \n\t" - "decl %%eax \n\t" - "jne 1 \n" -"0: \n\t" - : - : "S" (a), "a" (shift) - : "memory", "cc" - ); - } /* Convert int to LONGLONGCH */ -static __inline__ void IntToLL(LONGLONGCH *a, int *b) +static void IntToLL(LONGLONGCH *a, int *b) { -/* _asm { mov esi,b @@ -1453,16 +246,6 @@ static __inline__ void IntToLL(LONGLONGCH *a, int *b) mov [edi],eax mov [edi+4],edx } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "cdq \n\t" - "movl %%eax, 0(%%edi) \n\t" - "movl %%edx, 4(%%edi) \n\t" - : - : "S" (b), "D" (a) - : "%eax", "%edx", "memory", "cc" - ); - } /* @@ -1491,10 +274,9 @@ __asm__("movl 0(%%esi), %%eax \n\t" them though. */ -static __inline__ int MUL_FIXED(int a, int b) +static int MUL_FIXED(int a, int b) { int retval; -/* _asm { mov eax,a @@ -1502,14 +284,6 @@ static __inline__ int MUL_FIXED(int a, int b) shrd eax,edx,16 mov retval,eax } -*/ -/* TODO */ -__asm__("imull %2 \n\t" - "shrdl $16, %%edx, %%eax \n\t" - : "=a" (retval) - : "a" (a), "q" (b) - : "%edx", "cc" - ); return retval; } @@ -1519,10 +293,9 @@ __asm__("imull %2 \n\t" */ -static __inline__ int DIV_FIXED(int a, int b) +static int DIV_FIXED(int a, int b) { int retval; -/* _asm { mov eax,a @@ -1533,17 +306,6 @@ static __inline__ int DIV_FIXED(int a, int b) idiv b mov retval,eax } -*/ -/* TODO */ -__asm__("cdq \n\t" - "roll $16, %%eax \n\t" - "mov %%ax, %%dx \n\t" - "xor %%ax, %%ax \n\t" - "idivl %2 \n\t" - : "=a" (retval) - : "a" (a), "q" (b) - : "%edx", "cc" - ); return retval; } @@ -1570,10 +332,9 @@ __asm__("cdq \n\t" */ -static __inline__ int NarrowDivide(LONGLONGCH *a, int b) +static int NarrowDivide(LONGLONGCH *a, int b) { int retval; -/* _asm { mov esi,a @@ -1582,14 +343,6 @@ static __inline__ int NarrowDivide(LONGLONGCH *a, int b) idiv b mov retval,eax } -*/ -__asm__("movl 0(%%esi), %%eax \n\t" - "movl 4(%%esi), %%edx \n\t" - "idivl %2 \n\t" - : "=a" (retval) - : "S" (a), "q" (b) - : "%edx", "cc" - ); return retval; } @@ -1601,11 +354,9 @@ __asm__("movl 0(%%esi), %%eax \n\t" */ -static __inline__ int WideMulNarrowDiv(int a, int b, int c) +static int WideMulNarrowDiv(int a, int b, int c) { -#if 0 /* TODO: broken? */ int retval; -/* _asm { mov eax,a @@ -1613,17 +364,7 @@ static __inline__ int WideMulNarrowDiv(int a, int b, int c) idiv c mov retval,eax } -*/ -/* TODO */ -__asm__("imull %2 \n\t" - "idivl %3 \n\t" - : "=a" (retval) - : "a" (a), "q" (b), "q" (c) - : "cc" - ); return retval; -#endif - return (a * b) / c; } /* @@ -1672,7 +413,6 @@ typedef struct matrixch { */ -#if 0 /* TODO if these are needed */ static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m) { _asm @@ -1783,7 +523,6 @@ static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m) mov [edx + 8],ebp } } -#endif #if (SupportFPMathsFunctions || SupportFPSquareRoot) @@ -1795,15 +534,11 @@ static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m) */ -extern int sqrt_temp1; -extern int sqrt_temp2; +static long temp; +static long temp2; -#include -static __inline__ int SqRoot32(int A) +static int SqRoot32(int A) { -#if 0 - sqrt_temp1 = A; -/* _asm { finit @@ -1812,26 +547,7 @@ static __inline__ int SqRoot32(int A) fistp temp2 fwait } -*/ - -__asm__("finit \n\t" - "fild sqrt_temp1 \n\t" - "fsqrt \n\t" - "fistp sqrt_temp2 \n\t" - "fwait \n\t" - : - : - : "memory", "cc" - ); - - return sqrt_temp2; -#endif -{ /* TODO: clean this please */ - double x = A; - double retvald = sqrt(x); - int retval = retvald; - return retval; -} + return (int)temp2; } #endif @@ -1844,31 +560,16 @@ __asm__("finit \n\t" */ -extern float fti_fptmp; -extern int fti_itmp; +static float fptmp; +static int itmp; -static __inline__ int FloatToInt(float fptmp) +static void FloatToInt(void) { -#if 0 - fti_fptmp = fptmp; -/* _asm { fld fptmp fistp itmp } -*/ -__asm__("fld fti_fptmp \n\t" - "fistp fti_itmp \n\t" - : - : - : "memory", "cc" - ); - - return fti_itmp; -#endif - - return fptmp; } /* @@ -1878,20 +579,47 @@ __asm__("fld fti_fptmp \n\t" */ #define f2i(a, b) { \ -a = FloatToInt(b); \ -} +fptmp = (b); \ +FloatToInt(); \ +a = itmp;} + +#else + +/* inline assembly has been moved to mathline.c */ +void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); +void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); +void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); +void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a); +void MUL_I_WIDE(int a, int b, LONGLONGCH *c); +int CMP_LL(LONGLONGCH *a, LONGLONGCH *b); +void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b); +void NEG_LL(LONGLONGCH *a); +void ASR_LL(LONGLONGCH *a, int shift); +void IntToLL(LONGLONGCH *a, int *b); +int MUL_FIXED(int a, int b); +int DIV_FIXED(int a, int b); +#define DIV_INT(a, b) ((a) / (b)) + +int NarrowDivide(LONGLONGCH *a, int b); +int WideMulNarrowDiv(int a, int b, int c); +void RotateVector_ASM(VECTORCH *v, MATRIXCH *m); +void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); + +/* +int FloatToInt(float); +#define f2i(a, b) { a = FloatToInt(b); } +*/ -#if 0 int SqRoot32(int A); void FloatToInt(); +extern float fti_fptmp; +extern int fti_itmp; + #define f2i(a, b) { \ fti_fptmp = (b); \ FloatToInt(); \ a = fti_itmp;} -#endif - -#endif #endif -- cgit v1.3