summaryrefslogtreecommitdiff
path: root/src/win95/inline.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/win95/inline.h')
-rw-r--r--src/win95/inline.h1246
1 files changed, 1246 insertions, 0 deletions
diff --git a/src/win95/inline.h b/src/win95/inline.h
new file mode 100644
index 0000000..fdc5c60
--- /dev/null
+++ b/src/win95/inline.h
@@ -0,0 +1,1246 @@
+#ifndef INLINE_INCLUDED
+
+#if SUPPORT_MMX
+#include "mmx_math.h"
+#endif
+
+/*
+
+
+ Watcom PC Inline Functions.
+
+ Watcom Standard C does not support the C++ "inline" directive, so these
+ functions have been written as inline assembler instead.
+
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ Standard macros. Note that FIXED_TO_INT
+ and INT_TO_FIXED are very suboptimal in
+ this version!!!
+ Also, MUL_INT and ISR are ONLY intended
+ to be used in Win95 so that Saturn versions
+ of the same code can be compiled using calls
+ to hand optimised assembler functions, i.e.
+ for code that is never intended to be run on
+ a Saturn they are unnecessary.
+*/
+
+#define OUR_ABS(x) (((x) < 0) ? -(x) : (x))
+#define OUR_SIGN(x) (((x) < 0) ? -1 : +1)
+#define OUR_INT_TO_FIXED(x) (int) ((x) * (65536))
+#define OUR_FIXED_TO_INT(x) (int) ((x) / (65536))
+#define OUR_MUL_INT(a, b) ((a) * (b))
+#define OUR_ISR(a, shift) ((a) >> (shift))
+
+
+/*
+
+ win95\item.c functions
+
+*/
+
+void InitialiseTriangleArrayData(void);
+void* AllocateTriangleArrayData(int tasize);
+
+
+/*
+
+ General Triangle Array Handler Null Case / Error
+
+*/
+
+void TriangleArrayNullOrError(TRIANGLEARRAY *tarr);
+
+
+/*
+
+ Item Polygon Triangle Array Functions
+
+*/
+
+void Item_Polygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr);
+void Item_Polygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr);
+void Item_Polygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr);
+void Item_Polygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr);
+void Item_Polygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr);
+void Item_Polygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr);
+void Item_Polygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr);
+
+
+/*
+
+ Item Gouraud Polygon Triangle Array Functions
+
+*/
+
+void Item_GouraudPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr);
+void Item_GouraudPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr);
+void Item_GouraudPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr);
+void Item_GouraudPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr);
+void Item_GouraudPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr);
+void Item_GouraudPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr);
+void Item_GouraudPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr);
+
+/*
+
+ Item 2d Textured Polygon Triangle Array Functions
+
+*/
+
+void Item_2dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr);
+void Item_2dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr);
+void Item_2dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr);
+void Item_2dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr);
+void Item_2dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr);
+void Item_2dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr);
+void Item_2dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr);
+
+/*
+
+ Item Gouraud 2d Textured Polygon Triangle Array Functions
+
+*/
+
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr);
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr);
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr);
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr);
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr);
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr);
+void Item_Gouraud2dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr);
+
+
+/*
+
+ Item 3d Textured Polygon Triangle Array Functions
+
+*/
+
+void Item_3dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr);
+void Item_3dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr);
+void Item_3dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr);
+void Item_3dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr);
+void Item_3dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr);
+void Item_3dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr);
+void Item_3dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr);
+
+/*
+
+ Item Gouraud 3d Textured Polygon Triangle Array Functions
+
+*/
+
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_3(TRIANGLEARRAY *qarr);
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_4(TRIANGLEARRAY *qarr);
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_5(TRIANGLEARRAY *qarr);
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_6(TRIANGLEARRAY *qarr);
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_7(TRIANGLEARRAY *qarr);
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_8(TRIANGLEARRAY *qarr);
+void Item_Gouraud3dTexturedPolygon_PrepareTriangleArray_9(TRIANGLEARRAY *qarr);
+
+/*
+
+ Platform Specific 64-Bit Operator Functions
+
+ Not all compilers support 64-bit operations, and some platforms may not
+ even support 64-bit numbers. Support for 64-bit operations is therefore
+ provided in the platform specific fucntions below.
+
+ For C++ a mew class could be defined. However the current system is not
+ compiled as C++ and the Cygnus GNU C++ is not currently working.
+
+*/
+
+
+/*
+ These functions have been checked for suitability for
+ a Pentium and look as if they would pair up okay.
+ Might be worth a more detailed look at optimising
+ them though.
+ Obviously there is a problem with values not being
+ loaded into registers for these functions, but this
+ may be unavoidable for 64 bit values on a Watcom
+ platform.
+*/
+
+
+#ifdef __WATCOMC__ /* inline assember for the Watcom compiler */
+
+/* ADD */
+
+void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
+# pragma aux ADD_LL = \
+"mov eax,[esi]" \
+"mov edx,[esi+4]" \
+"add eax,[edi]" \
+"adc edx,[edi+4]" \
+"mov [ebx],eax" \
+"mov [ebx+4],edx" \
+parm[esi] [edi] [ebx] \
+modify[eax edx];
+
+
+/* ADD ++ */
+
+void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
+# pragma aux ADD_LL_PP = \
+"mov eax,[esi]" \
+"mov edx,[esi+4]" \
+"add [edi],eax" \
+"adc [edi+4],edx" \
+parm[edi] [esi] \
+modify[eax edx];
+
+
+/* SUB */
+
+void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
+# pragma aux SUB_LL = \
+"mov eax,[esi]" \
+"mov edx,[esi+4]" \
+"sub eax,[edi]" \
+"sbb edx,[edi+4]" \
+"mov [ebx],eax" \
+"mov [ebx+4],edx" \
+parm[esi] [edi] [ebx] \
+modify[eax edx];
+
+
+
+/* SUB -- */
+
+void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a);
+# pragma aux SUB_LL_MM = \
+"mov eax,[esi]" \
+"mov edx,[esi+4]" \
+"sub [edi],eax" \
+"sbb [edi+4],edx" \
+parm[edi] [esi] \
+modify[eax edx];
+
+
+/*
+
+ MUL
+
+ This is the multiply we use, the 32 x 32 = 64 widening version
+
+*/
+
+void MUL_I_WIDE(int a, int b, LONGLONGCH *c);
+# pragma aux MUL_I_WIDE = \
+"imul edx"\
+"mov [ebx],eax" \
+"mov [ebx+4],edx" \
+parm[eax] [edx] [ebx] \
+modify[eax edx];
+
+
+
+/*
+
+ CMP
+
+ This substitutes for ==, >, <, >=, <=
+
+*/
+
+int CMP_LL(LONGLONGCH *a, LONGLONGCH *b);
+# pragma aux CMP_LL = \
+"mov eax,[ebx]" \
+"mov edx,[ebx+4]" \
+"sub eax,[ecx]" \
+"sbb edx,[ecx+4]" \
+"and edx,edx" \
+"jne llnz" \
+"and eax,eax" \
+"jne llnz" \
+"xor eax,eax" \
+"jmp llgs" \
+"llnz:" \
+"mov eax,1" \
+"and edx,edx" \
+"jge llgs" \
+"neg eax" \
+"llgs:" \
+parm[ebx] [ecx] \
+value[eax] \
+modify[edx];
+
+
+
+
+/* EQUALS */
+
+void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b);
+# pragma aux EQUALS_LL = \
+"mov eax,[esi]" \
+"mov edx,[esi+4]" \
+"mov [edi],eax" \
+"mov [edi+4],edx" \
+parm[edi] [esi] \
+modify[eax edx];
+
+
+/* NEGATE */
+
+void NEG_LL(LONGLONGCH *a);
+# pragma aux NEG_LL = \
+"not dword ptr[esi]" \
+"not dword ptr[esi+4]" \
+"add dword ptr[esi],1" \
+"adc dword ptr[esi+4],0" \
+parm[esi];
+
+
+/* ASR */
+
+void ASR_LL(LONGLONGCH *a, int shift);
+# pragma aux ASR_LL = \
+"and eax,eax" \
+"jle asrdn" \
+"asrlp:" \
+"sar dword ptr[esi+4],1" \
+"rcr dword ptr[esi],1" \
+"dec eax" \
+"jne asrlp" \
+"asrdn:" \
+parm[esi] [eax];
+
+
+/* Convert int to LONGLONGCH */
+
+void IntToLL(LONGLONGCH *a, int *b);
+# pragma aux IntToLL = \
+"mov eax,[esi]" \
+"cdq" \
+"mov [edi],eax" \
+"mov [edi+4],edx" \
+parm[edi] [esi] \
+modify[eax edx];
+
+
+
+
+
+
+
+
+
+/*
+
+ Fixed Point Multiply.
+
+
+ 16.16 * 16.16 -> 16.16
+ or
+ 16.16 * 0.32 -> 0.32
+
+ A proper version of this function ought to read
+ 16.16 * 16.16 -> 32.16
+ but this would require a long long result
+
+ Algorithm:
+
+ Take the mid 32 bits of the 64 bit result
+
+*/
+
+/*
+ These functions have been checked for suitability for
+ a Pentium and look as if they would work adequately.
+ Might be worth a more detailed look at optimising
+ them though.
+*/
+
+#if 0
+
+int MUL_FIXED(int a, int b);
+# pragma aux MUL_FIXED = \
+"imul edx" \
+"mov ax,dx" \
+"rol eax,16" \
+parm[eax] [edx] \
+value[eax] \
+modify[edx];
+
+#else
+
+int MUL_FIXED(int a, int b);
+# pragma aux MUL_FIXED = \
+"imul edx" \
+"shrd eax,edx,16" \
+parm[eax] [edx] \
+value[eax] \
+modify[edx];
+
+#endif
+
+
+/*
+
+ Fixed Point Divide - returns a / b
+
+*/
+
+int DIV_FIXED(int a, int b);
+# pragma aux DIV_FIXED = \
+"cdq" \
+"rol eax,16" \
+"mov dx,ax" \
+"xor ax,ax" \
+"idiv ebx" \
+parm[eax] [ebx] \
+value[eax] \
+modify[edx];
+
+
+
+
+/*
+
+ Multiply and Divide Functions.
+
+*/
+
+
+/*
+
+ 32/32 division
+
+ This macro is a function on some other platforms
+
+*/
+
+#define DIV_INT(a, b) ((a) / (b))
+
+
+
+
+/*
+
+ A Narrowing 64/32 Division
+
+*/
+
+int NarrowDivide(LONGLONGCH *a, int b);
+# pragma aux NarrowDivide = \
+"mov eax,[esi]" \
+"mov edx,[esi+4]" \
+"idiv ebx" \
+parm[esi] [ebx] \
+value[eax] \
+modify[edx];
+
+
+
+/*
+
+ This function performs a Widening Multiply followed by a Narrowing Divide.
+
+ a = (a * b) / c
+
+*/
+
+int WideMulNarrowDiv(int a, int b, int c);
+# pragma aux WideMulNarrowDiv = \
+"imul edx"\
+"idiv ebx" \
+parm[eax] [edx] [ebx] \
+value[eax];
+
+
+
+/*
+
+ Function to rotate a VECTORCH using a MATRIXCH
+
+ This is the C function
+
+ x = MUL_FIXED(m->mat11, v->vx);
+ x += MUL_FIXED(m->mat21, v->vy);
+ x += MUL_FIXED(m->mat31, v->vz);
+
+ y = MUL_FIXED(m->mat12, v->vx);
+ y += MUL_FIXED(m->mat22, v->vy);
+ y += MUL_FIXED(m->mat32, v->vz);
+
+ z = MUL_FIXED(m->mat13, v->vx);
+ z += MUL_FIXED(m->mat23, v->vy);
+ z += MUL_FIXED(m->mat33, v->vz);
+
+ v->vx = x;
+ v->vy = y;
+ v->vz = z;
+
+ This is the MUL_FIXED inline assembler function
+
+ imul edx
+ shrd eax,edx,16
+
+
+typedef struct matrixch {
+
+ int mat11; 0
+ int mat12; 4
+ int mat13; 8
+
+ int mat21; 12
+ int mat22; 16
+ int mat23; 20
+
+ int mat31; 24
+ int mat32; 28
+ int mat33; 32
+
+} MATRIXCH;
+
+*/
+
+void RotateVector_ASM(VECTORCH *v, MATRIXCH *m);
+# pragma aux RotateVector_ASM = \
+\
+"push eax" \
+"push ebx" \
+"push ecx" \
+"push edx" \
+"push ebp" \
+\
+"mov eax,[edi + 0]" \
+"imul DWORD PTR [esi + 0]" \
+"shrd eax,edx,16" \
+"mov ecx,eax"\
+"mov eax,[edi + 12]" \
+"imul DWORD PTR [esi + 4]" \
+"shrd eax,edx,16" \
+"add ecx,eax" \
+"mov eax,[edi + 24]" \
+"imul DWORD PTR [esi + 8]" \
+"shrd eax,edx,16" \
+"add ecx,eax" \
+\
+"mov eax,[edi + 4]" \
+"imul DWORD PTR [esi + 0]" \
+"shrd eax,edx,16" \
+"mov ebx,eax"\
+"mov eax,[edi + 16]" \
+"imul DWORD PTR [esi + 4]" \
+"shrd eax,edx,16" \
+"add ebx,eax" \
+"mov eax,[edi + 28]" \
+"imul DWORD PTR [esi + 8]" \
+"shrd eax,edx,16" \
+"add ebx,eax" \
+\
+"mov eax,[edi + 8]" \
+"imul DWORD PTR [esi + 0]" \
+"shrd eax,edx,16" \
+"mov ebp,eax"\
+"mov eax,[edi + 20]" \
+"imul DWORD PTR [esi + 4]" \
+"shrd eax,edx,16" \
+"add ebp,eax" \
+"mov eax,[edi + 32]" \
+"imul DWORD PTR [esi + 8]" \
+"shrd eax,edx,16" \
+"add ebp,eax" \
+\
+"mov [esi + 0],ecx" \
+"mov [esi + 4],ebx" \
+"mov [esi + 8],ebp" \
+\
+"pop ebp" \
+"pop edx" \
+"pop ecx" \
+"pop ebx" \
+"pop eax" \
+\
+parm[esi] [edi];
+
+
+/*
+
+ Here is the same function, this time copying the result to a second vector
+
+*/
+
+void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
+# pragma aux RotateAndCopyVector_ASM = \
+\
+"push eax" \
+"push ebx" \
+"push ecx" \
+"push ebp" \
+\
+"push edx" \
+"mov eax,[edi + 0]" \
+"imul DWORD PTR [esi + 0]" \
+"shrd eax,edx,16" \
+"mov ecx,eax"\
+"mov eax,[edi + 12]" \
+"imul DWORD PTR [esi + 4]" \
+"shrd eax,edx,16" \
+"add ecx,eax" \
+"mov eax,[edi + 24]" \
+"imul DWORD PTR [esi + 8]" \
+"shrd eax,edx,16" \
+"add ecx,eax" \
+\
+"mov eax,[edi + 4]" \
+"imul DWORD PTR [esi + 0]" \
+"shrd eax,edx,16" \
+"mov ebx,eax"\
+"mov eax,[edi + 16]" \
+"imul DWORD PTR [esi + 4]" \
+"shrd eax,edx,16" \
+"add ebx,eax" \
+"mov eax,[edi + 28]" \
+"imul DWORD PTR [esi + 8]" \
+"shrd eax,edx,16" \
+"add ebx,eax" \
+\
+"mov eax,[edi + 8]" \
+"imul DWORD PTR [esi + 0]" \
+"shrd eax,edx,16" \
+"mov ebp,eax"\
+"mov eax,[edi + 20]" \
+"imul DWORD PTR [esi + 4]" \
+"shrd eax,edx,16" \
+"add ebp,eax" \
+"mov eax,[edi + 32]" \
+"imul DWORD PTR [esi + 8]" \
+"shrd eax,edx,16" \
+"add ebp,eax" \
+\
+"pop edx" \
+"mov [edx + 0],ecx" \
+"mov [edx + 4],ebx" \
+"mov [edx + 8],ebp" \
+\
+"pop ebp" \
+"pop ecx" \
+"pop ebx" \
+"pop eax" \
+\
+parm[esi] [edx] [edi];
+
+
+
+
+#if (SupportFPMathsFunctions || SupportFPSquareRoot)
+
+/*
+
+ Square Root
+
+ Returns the Square Root of a 32-bit number
+
+*/
+
+static long temp;
+static long temp2;
+
+int SqRoot32(int A);
+# pragma aux SqRoot32 = \
+"finit" \
+"mov temp,eax" \
+"fild temp" \
+"fsqrt" \
+"fistp temp2" \
+"fwait" \
+"mov eax,temp2" \
+parm[eax] \
+value[eax];
+
+#endif
+
+
+/*
+
+ This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
+ the function call "CHP" used by the WATCOM compiler.
+
+*/
+
+static float fptmp;
+static int itmp;
+
+void FloatToInt(void);
+# pragma aux FloatToInt = \
+"fld fptmp" \
+"fistp itmp";
+
+/*
+
+ This macro makes usage of the above function easier and more elegant
+
+*/
+
+#define f2i(a, b) { \
+fptmp = (b); \
+FloatToInt(); \
+a = itmp;}
+
+#elif defined(_MSC_VER) /* inline assember for the Microsoft compiler */
+
+/* ADD */
+
+static void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
+{
+ _asm
+ {
+ mov esi,a
+ mov edi,b
+ mov ebx,c
+ mov eax,[esi]
+ mov edx,[esi+4]
+ add eax,[edi]
+ adc edx,[edi+4]
+ mov [ebx],eax
+ mov [ebx+4],edx
+ }
+}
+
+/* ADD ++ */
+
+static void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
+{
+ _asm
+ {
+ mov edi,c
+ mov esi,a
+ mov eax,[esi]
+ mov edx,[esi+4]
+ add [edi],eax
+ adc [edi+4],edx
+ }
+}
+
+/* SUB */
+
+static void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
+{
+ _asm
+ {
+ mov esi,a
+ mov edi,b
+ mov ebx,c
+ mov eax,[esi]
+ mov edx,[esi+4]
+ sub eax,[edi]
+ sbb edx,[edi+4]
+ mov [ebx],eax
+ mov [ebx+4],edx
+ }
+}
+
+/* SUB -- */
+
+static void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
+{
+ _asm
+ {
+ mov edi,c
+ mov esi,a
+ mov eax,[esi]
+ mov edx,[esi+4]
+ sub [edi],eax
+ sbb [edi+4],edx
+ }
+}
+
+/*
+
+ MUL
+
+ This is the multiply we use, the 32 x 32 = 64 widening version
+
+*/
+
+static void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
+{
+ _asm
+ {
+ mov eax,a
+ mov ebx,c
+ imul b
+ mov [ebx],eax
+ mov [ebx+4],edx
+ }
+}
+
+/*
+
+ CMP
+
+ This substitutes for ==, >, <, >=, <=
+
+*/
+
+static int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
+{
+ int retval = 0;
+ _asm
+ {
+ mov ebx,a
+ mov ecx,b
+ mov eax,[ebx]
+ mov edx,[ebx+4]
+ sub eax,[ecx]
+ sbb edx,[ecx+4]
+ and edx,edx
+ jne llnz
+ and eax,eax
+ je llgs
+ llnz:
+ mov retval,1
+ and edx,edx
+ jge llgs
+ neg retval
+ llgs:
+ }
+ return retval;
+}
+
+/* EQUALS */
+
+static void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
+{
+ _asm
+ {
+ mov edi,a
+ mov esi,b
+ mov eax,[esi]
+ mov edx,[esi+4]
+ mov [edi],eax
+ mov [edi+4],edx
+ }
+}
+
+/* NEGATE */
+
+static void NEG_LL(LONGLONGCH *a)
+{
+ _asm
+ {
+ mov esi,a
+ not dword ptr[esi]
+ not dword ptr[esi+4]
+ add dword ptr[esi],1
+ adc dword ptr[esi+4],0
+ }
+}
+
+/* ASR */
+
+static void ASR_LL(LONGLONGCH *a, int shift)
+{
+ _asm
+ {
+ mov esi,a
+ mov eax,shift
+ and eax,eax
+ jle asrdn
+ asrlp:
+ sar dword ptr[esi+4],1
+ rcr dword ptr[esi],1
+ dec eax
+ jne asrlp
+ asrdn:
+ }
+}
+
+/* Convert int to LONGLONGCH */
+
+static void IntToLL(LONGLONGCH *a, int *b)
+{
+ _asm
+ {
+ mov esi,b
+ mov edi,a
+ mov eax,[esi]
+ cdq
+ mov [edi],eax
+ mov [edi+4],edx
+ }
+}
+
+/*
+
+ Fixed Point Multiply.
+
+
+ 16.16 * 16.16 -> 16.16
+ or
+ 16.16 * 0.32 -> 0.32
+
+ A proper version of this function ought to read
+ 16.16 * 16.16 -> 32.16
+ but this would require a long long result
+
+ Algorithm:
+
+ Take the mid 32 bits of the 64 bit result
+
+*/
+
+/*
+ These functions have been checked for suitability for
+ a Pentium and look as if they would work adequately.
+ Might be worth a more detailed look at optimising
+ them though.
+*/
+
+static int MUL_FIXED(int a, int b)
+{
+ int retval;
+ _asm
+ {
+ mov eax,a
+ imul b
+ shrd eax,edx,16
+ mov retval,eax
+ }
+ return retval;
+}
+
+/*
+
+ Fixed Point Divide - returns a / b
+
+*/
+
+static int DIV_FIXED(int a, int b)
+{
+ int retval;
+ _asm
+ {
+ mov eax,a
+ cdq
+ rol eax,16
+ mov dx,ax
+ xor ax,ax
+ idiv b
+ mov retval,eax
+ }
+ return retval;
+}
+
+/*
+
+ Multiply and Divide Functions.
+
+*/
+
+
+/*
+
+ 32/32 division
+
+ This macro is a function on some other platforms
+
+*/
+
+#define DIV_INT(a, b) ((a) / (b))
+
+/*
+
+ A Narrowing 64/32 Division
+
+*/
+
+static int NarrowDivide(LONGLONGCH *a, int b)
+{
+ int retval;
+ _asm
+ {
+ mov esi,a
+ mov eax,[esi]
+ mov edx,[esi+4]
+ idiv b
+ mov retval,eax
+ }
+ return retval;
+}
+
+/*
+
+ This function performs a Widening Multiply followed by a Narrowing Divide.
+
+ a = (a * b) / c
+
+*/
+
+static int WideMulNarrowDiv(int a, int b, int c)
+{
+ int retval;
+ _asm
+ {
+ mov eax,a
+ imul b
+ idiv c
+ mov retval,eax
+ }
+ return retval;
+}
+
+/*
+
+ Function to rotate a VECTORCH using a MATRIXCH
+
+ This is the C function
+
+ x = MUL_FIXED(m->mat11, v->vx);
+ x += MUL_FIXED(m->mat21, v->vy);
+ x += MUL_FIXED(m->mat31, v->vz);
+
+ y = MUL_FIXED(m->mat12, v->vx);
+ y += MUL_FIXED(m->mat22, v->vy);
+ y += MUL_FIXED(m->mat32, v->vz);
+
+ z = MUL_FIXED(m->mat13, v->vx);
+ z += MUL_FIXED(m->mat23, v->vy);
+ z += MUL_FIXED(m->mat33, v->vz);
+
+ v->vx = x;
+ v->vy = y;
+ v->vz = z;
+
+ This is the MUL_FIXED inline assembler function
+
+ imul edx
+ shrd eax,edx,16
+
+
+typedef struct matrixch {
+
+ int mat11; 0
+ int mat12; 4
+ int mat13; 8
+
+ int mat21; 12
+ int mat22; 16
+ int mat23; 20
+
+ int mat31; 24
+ int mat32; 28
+ int mat33; 32
+
+} MATRIXCH;
+
+*/
+
+static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m)
+{
+ _asm
+ {
+ mov esi,v
+ mov edi,m
+
+ mov eax,[edi + 0]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ecx,eax
+ mov eax,[edi + 12]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ecx,eax
+ mov eax,[edi + 24]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ecx,eax
+
+ mov eax,[edi + 4]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebx,eax
+ mov eax,[edi + 16]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebx,eax
+ mov eax,[edi + 28]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebx,eax
+
+ mov eax,[edi + 8]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebp,eax
+ mov eax,[edi + 20]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebp,eax
+ mov eax,[edi + 32]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebp,eax
+
+ mov [esi + 0],ecx
+ mov [esi + 4],ebx
+ mov [esi + 8],ebp
+ }
+}
+
+/*
+
+ Here is the same function, this time copying the result to a second vector
+
+*/
+
+static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m)
+{
+ _asm
+ {
+ mov esi,v1
+ mov edi,m
+
+ mov eax,[edi + 0]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ecx,eax
+ mov eax,[edi + 12]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ecx,eax
+ mov eax,[edi + 24]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ecx,eax
+
+ mov eax,[edi + 4]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebx,eax
+ mov eax,[edi + 16]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebx,eax
+ mov eax,[edi + 28]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebx,eax
+
+ mov eax,[edi + 8]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebp,eax
+ mov eax,[edi + 20]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebp,eax
+ mov eax,[edi + 32]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebp,eax
+
+ mov edx,v2
+ mov [edx + 0],ecx
+ mov [edx + 4],ebx
+ mov [edx + 8],ebp
+ }
+}
+
+#if (SupportFPMathsFunctions || SupportFPSquareRoot)
+
+/*
+
+ Square Root
+
+ Returns the Square Root of a 32-bit number
+
+*/
+
+static long temp;
+static long temp2;
+
+static int SqRoot32(int A)
+{
+ _asm
+ {
+ finit
+ fild A
+ fsqrt
+ fistp temp2
+ fwait
+ }
+ return (int)temp2;
+}
+
+#endif
+
+
+/*
+
+ This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
+ the function call "CHP" used by the WATCOM compiler.
+
+*/
+
+static float fptmp;
+static int itmp;
+
+static void FloatToInt(void)
+{
+ _asm
+ {
+ fld fptmp
+ fistp itmp
+ }
+}
+
+/*
+
+ This macro makes usage of the above function easier and more elegant
+
+*/
+
+#define f2i(a, b) { \
+fptmp = (b); \
+FloatToInt(); \
+a = itmp;}
+
+#else /* other compiler ? */
+
+#error "Unknown compiler"
+
+#endif
+
+
+/* These functions are in plspecfn.c */
+
+int WideMul2NarrowDiv(int a, int b, int c, int d, int e);
+int _Dot(VECTORCH *vptr1, VECTORCH *vptr2);
+void MakeV(VECTORCH *v1, VECTORCH *v2, VECTORCH *v3);
+void AddV(VECTORCH *v1, VECTORCH *v2);
+void RotVect(VECTORCH *v, MATRIXCH *m);
+void CopyClipPoint(CLIP_POINT *cp1, CLIP_POINT *cp2);
+
+#if SUPPORT_MMX
+
+#define RotateVector(v,m) (use_mmx_math ? MMX_VectorTransform((v),(m)) : _RotateVector((v),(m)))
+#define RotateAndCopyVector(v_in,v_out,m) (use_mmx_math ? MMX_VectorTransformed((v_out),(v_in),(m)) : _RotateAndCopyVector((v_in),(v_out),(m)))
+#define Dot(v1,v2) (use_mmx_math ? MMXInline_VectorDot((v1),(v2)) : _Dot((v1),(v2)))
+#define DotProduct(v1,v2) (use_mmx_math ? MMX_VectorDot((v1),(v2)) : _DotProduct((v1),(v2)))
+
+#else /* ! SUPPORT_MMX */
+
+#define RotateVector(v,m) (_RotateVector((v),(m)))
+#define RotateAndCopyVector(v_in,v_out,m) (_RotateAndCopyVector((v_in),(v_out),(m)))
+#define Dot(v1,v2) (_Dot((v1),(v2)))
+#define DotProduct(v1,v2) (_DotProduct((v1),(v2)))
+
+#endif /* ? SUPPORT_MMX */
+
+#ifdef __cplusplus
+}
+#endif
+
+#define INLINE_INCLUDED
+#endif
+