diff options
| author | Rebellion Developments <rebellion@nomail> | 2000-03-16 11:25:00 +0100 |
|---|---|---|
| committer | Patryk Obara <dreamer.tan@gmail.com> | 2019-08-19 05:45:17 +0200 |
| commit | 218ca90543758a20ac326e444ca0643174ca7384 (patch) | |
| tree | 16bfe3e5307f9f515489000f28728224291a0e3b /3dc/win95/INLINE.H | |
Import Aliens vs Predator - Gold (Build 116)
Source code release, imported from:
https://www.gamefront.com/games/aliens-vs-predator-3/file/avp-gold-complete-source-code
All text files were converted to Unix format.
Diffstat (limited to '3dc/win95/INLINE.H')
| -rw-r--r-- | 3dc/win95/INLINE.H | 1246 |
1 files changed, 1246 insertions, 0 deletions
diff --git a/3dc/win95/INLINE.H b/3dc/win95/INLINE.H new file mode 100644 index 0000000..fdc5c60 --- /dev/null +++ b/3dc/win95/INLINE.H @@ -0,0 +1,1246 @@ +#ifndef INLINE_INCLUDED + +#if SUPPORT_MMX +#include "mmx_math.h" +#endif + +/* + + + Watcom PC Inline Functions. + + Watcom Standard C does not support the C++ "inline" directive, so these + functions have been written as inline assembler instead. + +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + Standard macros. Note that FIXED_TO_INT + and INT_TO_FIXED are very suboptimal in + this version!!! + Also, MUL_INT and ISR are ONLY intended + to be used in Win95 so that Saturn versions + of the same code can be compiled using calls + to hand optimised assembler functions, i.e. + for code that is never intended to be run on + a Saturn they are unnecessary. +*/ + +#define OUR_ABS(x) (((x) < 0) ? -(x) : (x)) +#define OUR_SIGN(x) (((x) < 0) ? -1 : +1) +#define OUR_INT_TO_FIXED(x) (int) ((x) * (65536)) +#define OUR_FIXED_TO_INT(x) (int) ((x) / (65536)) +#define OUR_MUL_INT(a, b) ((a) * (b)) +#define OUR_ISR(a, shift) ((a) >> (shift)) + + +/* + + win95\item.c functions + +*/ + +void InitialiseTriangleArrayData(void); +void* AllocateTriangleArrayData(int tasize); + + +/* + + General Triangle Array Handler Null Case / Error + +*/ + +void TriangleArrayNullOrError(TRIANGLEARRAY *tarr); + + +/* + + Item Polygon Triangle Array Functions + +*/ + +void Item_Polygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr); +void Item_Polygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr); +void Item_Polygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr); +void Item_Polygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr); +void Item_Polygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr); +void Item_Polygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr); +void Item_Polygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr); + + +/* + + Item Gouraud Polygon Triangle Array Functions + +*/ + +void Item_GouraudPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr); +void Item_GouraudPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr); +void Item_GouraudPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr); +void Item_GouraudPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr); +void Item_GouraudPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr); +void Item_GouraudPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr); +void Item_GouraudPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr); + +/* + + Item 2d Textured Polygon Triangle Array Functions + +*/ + +void Item_2dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr); +void Item_2dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr); +void Item_2dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr); +void Item_2dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr); +void Item_2dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr); +void Item_2dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr); +void Item_2dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr); + +/* + + Item Gouraud 2d Textured Polygon Triangle Array Functions + +*/ + +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr); +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr); +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr); +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr); +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr); +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr); +void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr); + + +/* + + Item 3d Textured Polygon Triangle Array Functions + +*/ + +void Item_3dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr); +void Item_3dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr); +void Item_3dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr); +void Item_3dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr); +void Item_3dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr); +void Item_3dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr); +void Item_3dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr); + +/* + + Item Gouraud 3d Textured Polygon Triangle Array Functions + +*/ + +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr); +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr); +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr); +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr); +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr); +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr); +void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr); + +/* + + Platform Specific 64-Bit Operator Functions + + Not all compilers support 64-bit operations, and some platforms may not + even support 64-bit numbers. Support for 64-bit operations is therefore + provided in the platform specific fucntions below. + + For C++ a mew class could be defined. However the current system is not + compiled as C++ and the Cygnus GNU C++ is not currently working. + +*/ + + +/* + These functions have been checked for suitability for + a Pentium and look as if they would pair up okay. + Might be worth a more detailed look at optimising + them though. + Obviously there is a problem with values not being + loaded into registers for these functions, but this + may be unavoidable for 64 bit values on a Watcom + platform. +*/ + + +#ifdef __WATCOMC__ /* inline assember for the Watcom compiler */ + +/* ADD */ + +void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); +# pragma aux ADD_LL = \ +"mov eax,[esi]" \ +"mov edx,[esi+4]" \ +"add eax,[edi]" \ +"adc edx,[edi+4]" \ +"mov [ebx],eax" \ +"mov [ebx+4],edx" \ +parm[esi] [edi] [ebx] \ +modify[eax edx]; + + +/* ADD ++ */ + +void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); +# pragma aux ADD_LL_PP = \ +"mov eax,[esi]" \ +"mov edx,[esi+4]" \ +"add [edi],eax" \ +"adc [edi+4],edx" \ +parm[edi] [esi] \ +modify[eax edx]; + + +/* SUB */ + +void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); +# pragma aux SUB_LL = \ +"mov eax,[esi]" \ +"mov edx,[esi+4]" \ +"sub eax,[edi]" \ +"sbb edx,[edi+4]" \ +"mov [ebx],eax" \ +"mov [ebx+4],edx" \ +parm[esi] [edi] [ebx] \ +modify[eax edx]; + + + +/* SUB -- */ + +void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a); +# pragma aux SUB_LL_MM = \ +"mov eax,[esi]" \ +"mov edx,[esi+4]" \ +"sub [edi],eax" \ +"sbb [edi+4],edx" \ +parm[edi] [esi] \ +modify[eax edx]; + + +/* + + MUL + + This is the multiply we use, the 32 x 32 = 64 widening version + +*/ + +void MUL_I_WIDE(int a, int b, LONGLONGCH *c); +# pragma aux MUL_I_WIDE = \ +"imul edx"\ +"mov [ebx],eax" \ +"mov [ebx+4],edx" \ +parm[eax] [edx] [ebx] \ +modify[eax edx]; + + + +/* + + CMP + + This substitutes for ==, >, <, >=, <= + +*/ + +int CMP_LL(LONGLONGCH *a, LONGLONGCH *b); +# pragma aux CMP_LL = \ +"mov eax,[ebx]" \ +"mov edx,[ebx+4]" \ +"sub eax,[ecx]" \ +"sbb edx,[ecx+4]" \ +"and edx,edx" \ +"jne llnz" \ +"and eax,eax" \ +"jne llnz" \ +"xor eax,eax" \ +"jmp llgs" \ +"llnz:" \ +"mov eax,1" \ +"and edx,edx" \ +"jge llgs" \ +"neg eax" \ +"llgs:" \ +parm[ebx] [ecx] \ +value[eax] \ +modify[edx]; + + + + +/* EQUALS */ + +void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b); +# pragma aux EQUALS_LL = \ +"mov eax,[esi]" \ +"mov edx,[esi+4]" \ +"mov [edi],eax" \ +"mov [edi+4],edx" \ +parm[edi] [esi] \ +modify[eax edx]; + + +/* NEGATE */ + +void NEG_LL(LONGLONGCH *a); +# pragma aux NEG_LL = \ +"not dword ptr[esi]" \ +"not dword ptr[esi+4]" \ +"add dword ptr[esi],1" \ +"adc dword ptr[esi+4],0" \ +parm[esi]; + + +/* ASR */ + +void ASR_LL(LONGLONGCH *a, int shift); +# pragma aux ASR_LL = \ +"and eax,eax" \ +"jle asrdn" \ +"asrlp:" \ +"sar dword ptr[esi+4],1" \ +"rcr dword ptr[esi],1" \ +"dec eax" \ +"jne asrlp" \ +"asrdn:" \ +parm[esi] [eax]; + + +/* Convert int to LONGLONGCH */ + +void IntToLL(LONGLONGCH *a, int *b); +# pragma aux IntToLL = \ +"mov eax,[esi]" \ +"cdq" \ +"mov [edi],eax" \ +"mov [edi+4],edx" \ +parm[edi] [esi] \ +modify[eax edx]; + + + + + + + + + +/* + + Fixed Point Multiply. + + + 16.16 * 16.16 -> 16.16 + or + 16.16 * 0.32 -> 0.32 + + A proper version of this function ought to read + 16.16 * 16.16 -> 32.16 + but this would require a long long result + + Algorithm: + + Take the mid 32 bits of the 64 bit result + +*/ + +/* + These functions have been checked for suitability for + a Pentium and look as if they would work adequately. + Might be worth a more detailed look at optimising + them though. +*/ + +#if 0 + +int MUL_FIXED(int a, int b); +# pragma aux MUL_FIXED = \ +"imul edx" \ +"mov ax,dx" \ +"rol eax,16" \ +parm[eax] [edx] \ +value[eax] \ +modify[edx]; + +#else + +int MUL_FIXED(int a, int b); +# pragma aux MUL_FIXED = \ +"imul edx" \ +"shrd eax,edx,16" \ +parm[eax] [edx] \ +value[eax] \ +modify[edx]; + +#endif + + +/* + + Fixed Point Divide - returns a / b + +*/ + +int DIV_FIXED(int a, int b); +# pragma aux DIV_FIXED = \ +"cdq" \ +"rol eax,16" \ +"mov dx,ax" \ +"xor ax,ax" \ +"idiv ebx" \ +parm[eax] [ebx] \ +value[eax] \ +modify[edx]; + + + + +/* + + Multiply and Divide Functions. + +*/ + + +/* + + 32/32 division + + This macro is a function on some other platforms + +*/ + +#define DIV_INT(a, b) ((a) / (b)) + + + + +/* + + A Narrowing 64/32 Division + +*/ + +int NarrowDivide(LONGLONGCH *a, int b); +# pragma aux NarrowDivide = \ +"mov eax,[esi]" \ +"mov edx,[esi+4]" \ +"idiv ebx" \ +parm[esi] [ebx] \ +value[eax] \ +modify[edx]; + + + +/* + + This function performs a Widening Multiply followed by a Narrowing Divide. + + a = (a * b) / c + +*/ + +int WideMulNarrowDiv(int a, int b, int c); +# pragma aux WideMulNarrowDiv = \ +"imul edx"\ +"idiv ebx" \ +parm[eax] [edx] [ebx] \ +value[eax]; + + + +/* + + Function to rotate a VECTORCH using a MATRIXCH + + This is the C function + + x = MUL_FIXED(m->mat11, v->vx); + x += MUL_FIXED(m->mat21, v->vy); + x += MUL_FIXED(m->mat31, v->vz); + + y = MUL_FIXED(m->mat12, v->vx); + y += MUL_FIXED(m->mat22, v->vy); + y += MUL_FIXED(m->mat32, v->vz); + + z = MUL_FIXED(m->mat13, v->vx); + z += MUL_FIXED(m->mat23, v->vy); + z += MUL_FIXED(m->mat33, v->vz); + + v->vx = x; + v->vy = y; + v->vz = z; + + This is the MUL_FIXED inline assembler function + + imul edx + shrd eax,edx,16 + + +typedef struct matrixch { + + int mat11; 0 + int mat12; 4 + int mat13; 8 + + int mat21; 12 + int mat22; 16 + int mat23; 20 + + int mat31; 24 + int mat32; 28 + int mat33; 32 + +} MATRIXCH; + +*/ + +void RotateVector_ASM(VECTORCH *v, MATRIXCH *m); +# pragma aux RotateVector_ASM = \ +\ +"push eax" \ +"push ebx" \ +"push ecx" \ +"push edx" \ +"push ebp" \ +\ +"mov eax,[edi + 0]" \ +"imul DWORD PTR [esi + 0]" \ +"shrd eax,edx,16" \ +"mov ecx,eax"\ +"mov eax,[edi + 12]" \ +"imul DWORD PTR [esi + 4]" \ +"shrd eax,edx,16" \ +"add ecx,eax" \ +"mov eax,[edi + 24]" \ +"imul DWORD PTR [esi + 8]" \ +"shrd eax,edx,16" \ +"add ecx,eax" \ +\ +"mov eax,[edi + 4]" \ +"imul DWORD PTR [esi + 0]" \ +"shrd eax,edx,16" \ +"mov ebx,eax"\ +"mov eax,[edi + 16]" \ +"imul DWORD PTR [esi + 4]" \ +"shrd eax,edx,16" \ +"add ebx,eax" \ +"mov eax,[edi + 28]" \ +"imul DWORD PTR [esi + 8]" \ +"shrd eax,edx,16" \ +"add ebx,eax" \ +\ +"mov eax,[edi + 8]" \ +"imul DWORD PTR [esi + 0]" \ +"shrd eax,edx,16" \ +"mov ebp,eax"\ +"mov eax,[edi + 20]" \ +"imul DWORD PTR [esi + 4]" \ +"shrd eax,edx,16" \ +"add ebp,eax" \ +"mov eax,[edi + 32]" \ +"imul DWORD PTR [esi + 8]" \ +"shrd eax,edx,16" \ +"add ebp,eax" \ +\ +"mov [esi + 0],ecx" \ +"mov [esi + 4],ebx" \ +"mov [esi + 8],ebp" \ +\ +"pop ebp" \ +"pop edx" \ +"pop ecx" \ +"pop ebx" \ +"pop eax" \ +\ +parm[esi] [edi]; + + +/* + + Here is the same function, this time copying the result to a second vector + +*/ + +void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); +# pragma aux RotateAndCopyVector_ASM = \ +\ +"push eax" \ +"push ebx" \ +"push ecx" \ +"push ebp" \ +\ +"push edx" \ +"mov eax,[edi + 0]" \ +"imul DWORD PTR [esi + 0]" \ +"shrd eax,edx,16" \ +"mov ecx,eax"\ +"mov eax,[edi + 12]" \ +"imul DWORD PTR [esi + 4]" \ +"shrd eax,edx,16" \ +"add ecx,eax" \ +"mov eax,[edi + 24]" \ +"imul DWORD PTR [esi + 8]" \ +"shrd eax,edx,16" \ +"add ecx,eax" \ +\ +"mov eax,[edi + 4]" \ +"imul DWORD PTR [esi + 0]" \ +"shrd eax,edx,16" \ +"mov ebx,eax"\ +"mov eax,[edi + 16]" \ +"imul DWORD PTR [esi + 4]" \ +"shrd eax,edx,16" \ +"add ebx,eax" \ +"mov eax,[edi + 28]" \ +"imul DWORD PTR [esi + 8]" \ +"shrd eax,edx,16" \ +"add ebx,eax" \ +\ +"mov eax,[edi + 8]" \ +"imul DWORD PTR [esi + 0]" \ +"shrd eax,edx,16" \ +"mov ebp,eax"\ +"mov eax,[edi + 20]" \ +"imul DWORD PTR [esi + 4]" \ +"shrd eax,edx,16" \ +"add ebp,eax" \ +"mov eax,[edi + 32]" \ +"imul DWORD PTR [esi + 8]" \ +"shrd eax,edx,16" \ +"add ebp,eax" \ +\ +"pop edx" \ +"mov [edx + 0],ecx" \ +"mov [edx + 4],ebx" \ +"mov [edx + 8],ebp" \ +\ +"pop ebp" \ +"pop ecx" \ +"pop ebx" \ +"pop eax" \ +\ +parm[esi] [edx] [edi]; + + + + +#if (SupportFPMathsFunctions || SupportFPSquareRoot) + +/* + + Square Root + + Returns the Square Root of a 32-bit number + +*/ + +static long temp; +static long temp2; + +int SqRoot32(int A); +# pragma aux SqRoot32 = \ +"finit" \ +"mov temp,eax" \ +"fild temp" \ +"fsqrt" \ +"fistp temp2" \ +"fwait" \ +"mov eax,temp2" \ +parm[eax] \ +value[eax]; + +#endif + + +/* + + This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than + the function call "CHP" used by the WATCOM compiler. + +*/ + +static float fptmp; +static int itmp; + +void FloatToInt(void); +# pragma aux FloatToInt = \ +"fld fptmp" \ +"fistp itmp"; + +/* + + This macro makes usage of the above function easier and more elegant + +*/ + +#define f2i(a, b) { \ +fptmp = (b); \ +FloatToInt(); \ +a = itmp;} + +#elif defined(_MSC_VER) /* inline assember for the Microsoft compiler */ + +/* ADD */ + +static void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) +{ + _asm + { + mov esi,a + mov edi,b + mov ebx,c + mov eax,[esi] + mov edx,[esi+4] + add eax,[edi] + adc edx,[edi+4] + mov [ebx],eax + mov [ebx+4],edx + } +} + +/* ADD ++ */ + +static void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) +{ + _asm + { + mov edi,c + mov esi,a + mov eax,[esi] + mov edx,[esi+4] + add [edi],eax + adc [edi+4],edx + } +} + +/* SUB */ + +static void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) +{ + _asm + { + mov esi,a + mov edi,b + mov ebx,c + mov eax,[esi] + mov edx,[esi+4] + sub eax,[edi] + sbb edx,[edi+4] + mov [ebx],eax + mov [ebx+4],edx + } +} + +/* SUB -- */ + +static void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) +{ + _asm + { + mov edi,c + mov esi,a + mov eax,[esi] + mov edx,[esi+4] + sub [edi],eax + sbb [edi+4],edx + } +} + +/* + + MUL + + This is the multiply we use, the 32 x 32 = 64 widening version + +*/ + +static void MUL_I_WIDE(int a, int b, LONGLONGCH *c) +{ + _asm + { + mov eax,a + mov ebx,c + imul b + mov [ebx],eax + mov [ebx+4],edx + } +} + +/* + + CMP + + This substitutes for ==, >, <, >=, <= + +*/ + +static int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) +{ + int retval = 0; + _asm + { + mov ebx,a + mov ecx,b + mov eax,[ebx] + mov edx,[ebx+4] + sub eax,[ecx] + sbb edx,[ecx+4] + and edx,edx + jne llnz + and eax,eax + je llgs + llnz: + mov retval,1 + and edx,edx + jge llgs + neg retval + llgs: + } + return retval; +} + +/* EQUALS */ + +static void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) +{ + _asm + { + mov edi,a + mov esi,b + mov eax,[esi] + mov edx,[esi+4] + mov [edi],eax + mov [edi+4],edx + } +} + +/* NEGATE */ + +static void NEG_LL(LONGLONGCH *a) +{ + _asm + { + mov esi,a + not dword ptr[esi] + not dword ptr[esi+4] + add dword ptr[esi],1 + adc dword ptr[esi+4],0 + } +} + +/* ASR */ + +static void ASR_LL(LONGLONGCH *a, int shift) +{ + _asm + { + mov esi,a + mov eax,shift + and eax,eax + jle asrdn + asrlp: + sar dword ptr[esi+4],1 + rcr dword ptr[esi],1 + dec eax + jne asrlp + asrdn: + } +} + +/* Convert int to LONGLONGCH */ + +static void IntToLL(LONGLONGCH *a, int *b) +{ + _asm + { + mov esi,b + mov edi,a + mov eax,[esi] + cdq + mov [edi],eax + mov [edi+4],edx + } +} + +/* + + Fixed Point Multiply. + + + 16.16 * 16.16 -> 16.16 + or + 16.16 * 0.32 -> 0.32 + + A proper version of this function ought to read + 16.16 * 16.16 -> 32.16 + but this would require a long long result + + Algorithm: + + Take the mid 32 bits of the 64 bit result + +*/ + +/* + These functions have been checked for suitability for + a Pentium and look as if they would work adequately. + Might be worth a more detailed look at optimising + them though. +*/ + +static int MUL_FIXED(int a, int b) +{ + int retval; + _asm + { + mov eax,a + imul b + shrd eax,edx,16 + mov retval,eax + } + return retval; +} + +/* + + Fixed Point Divide - returns a / b + +*/ + +static int DIV_FIXED(int a, int b) +{ + int retval; + _asm + { + mov eax,a + cdq + rol eax,16 + mov dx,ax + xor ax,ax + idiv b + mov retval,eax + } + return retval; +} + +/* + + Multiply and Divide Functions. + +*/ + + +/* + + 32/32 division + + This macro is a function on some other platforms + +*/ + +#define DIV_INT(a, b) ((a) / (b)) + +/* + + A Narrowing 64/32 Division + +*/ + +static int NarrowDivide(LONGLONGCH *a, int b) +{ + int retval; + _asm + { + mov esi,a + mov eax,[esi] + mov edx,[esi+4] + idiv b + mov retval,eax + } + return retval; +} + +/* + + This function performs a Widening Multiply followed by a Narrowing Divide. + + a = (a * b) / c + +*/ + +static int WideMulNarrowDiv(int a, int b, int c) +{ + int retval; + _asm + { + mov eax,a + imul b + idiv c + mov retval,eax + } + return retval; +} + +/* + + Function to rotate a VECTORCH using a MATRIXCH + + This is the C function + + x = MUL_FIXED(m->mat11, v->vx); + x += MUL_FIXED(m->mat21, v->vy); + x += MUL_FIXED(m->mat31, v->vz); + + y = MUL_FIXED(m->mat12, v->vx); + y += MUL_FIXED(m->mat22, v->vy); + y += MUL_FIXED(m->mat32, v->vz); + + z = MUL_FIXED(m->mat13, v->vx); + z += MUL_FIXED(m->mat23, v->vy); + z += MUL_FIXED(m->mat33, v->vz); + + v->vx = x; + v->vy = y; + v->vz = z; + + This is the MUL_FIXED inline assembler function + + imul edx + shrd eax,edx,16 + + +typedef struct matrixch { + + int mat11; 0 + int mat12; 4 + int mat13; 8 + + int mat21; 12 + int mat22; 16 + int mat23; 20 + + int mat31; 24 + int mat32; 28 + int mat33; 32 + +} MATRIXCH; + +*/ + +static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m) +{ + _asm + { + mov esi,v + mov edi,m + + mov eax,[edi + 0] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ecx,eax + mov eax,[edi + 12] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ecx,eax + mov eax,[edi + 24] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ecx,eax + + mov eax,[edi + 4] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebx,eax + mov eax,[edi + 16] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebx,eax + mov eax,[edi + 28] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebx,eax + + mov eax,[edi + 8] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebp,eax + mov eax,[edi + 20] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebp,eax + mov eax,[edi + 32] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebp,eax + + mov [esi + 0],ecx + mov [esi + 4],ebx + mov [esi + 8],ebp + } +} + +/* + + Here is the same function, this time copying the result to a second vector + +*/ + +static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m) +{ + _asm + { + mov esi,v1 + mov edi,m + + mov eax,[edi + 0] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ecx,eax + mov eax,[edi + 12] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ecx,eax + mov eax,[edi + 24] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ecx,eax + + mov eax,[edi + 4] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebx,eax + mov eax,[edi + 16] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebx,eax + mov eax,[edi + 28] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebx,eax + + mov eax,[edi + 8] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebp,eax + mov eax,[edi + 20] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebp,eax + mov eax,[edi + 32] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebp,eax + + mov edx,v2 + mov [edx + 0],ecx + mov [edx + 4],ebx + mov [edx + 8],ebp + } +} + +#if (SupportFPMathsFunctions || SupportFPSquareRoot) + +/* + + Square Root + + Returns the Square Root of a 32-bit number + +*/ + +static long temp; +static long temp2; + +static int SqRoot32(int A) +{ + _asm + { + finit + fild A + fsqrt + fistp temp2 + fwait + } + return (int)temp2; +} + +#endif + + +/* + + This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than + the function call "CHP" used by the WATCOM compiler. + +*/ + +static float fptmp; +static int itmp; + +static void FloatToInt(void) +{ + _asm + { + fld fptmp + fistp itmp + } +} + +/* + + This macro makes usage of the above function easier and more elegant + +*/ + +#define f2i(a, b) { \ +fptmp = (b); \ +FloatToInt(); \ +a = itmp;} + +#else /* other compiler ? */ + +#error "Unknown compiler" + +#endif + + +/* These functions are in plspecfn.c */ + +int WideMul2NarrowDiv(int a, int b, int c, int d, int e); +int _Dot(VECTORCH *vptr1, VECTORCH *vptr2); +void MakeV(VECTORCH *v1, VECTORCH *v2, VECTORCH *v3); +void AddV(VECTORCH *v1, VECTORCH *v2); +void RotVect(VECTORCH *v, MATRIXCH *m); +void CopyClipPoint(CLIP_POINT *cp1, CLIP_POINT *cp2); + +#if SUPPORT_MMX + +#define RotateVector(v,m) (use_mmx_math ? MMX_VectorTransform((v),(m)) : _RotateVector((v),(m))) +#define RotateAndCopyVector(v_in,v_out,m) (use_mmx_math ? MMX_VectorTransformed((v_out),(v_in),(m)) : _RotateAndCopyVector((v_in),(v_out),(m))) +#define Dot(v1,v2) (use_mmx_math ? MMXInline_VectorDot((v1),(v2)) : _Dot((v1),(v2))) +#define DotProduct(v1,v2) (use_mmx_math ? MMX_VectorDot((v1),(v2)) : _DotProduct((v1),(v2))) + +#else /* ! SUPPORT_MMX */ + +#define RotateVector(v,m) (_RotateVector((v),(m))) +#define RotateAndCopyVector(v_in,v_out,m) (_RotateAndCopyVector((v_in),(v_out),(m))) +#define Dot(v1,v2) (_Dot((v1),(v2))) +#define DotProduct(v1,v2) (_DotProduct((v1),(v2))) + +#endif /* ? SUPPORT_MMX */ + +#ifdef __cplusplus +} +#endif + +#define INLINE_INCLUDED +#endif + |
