summaryrefslogtreecommitdiff
path: root/src/win95/inline.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/win95/inline.h')
-rw-r--r--src/win95/inline.h1276
1 files changed, 2 insertions, 1274 deletions
diff --git a/src/win95/inline.h b/src/win95/inline.h
index da000e4..717881d 100644
--- a/src/win95/inline.h
+++ b/src/win95/inline.h
@@ -14,15 +14,6 @@
#include "mmx_math.h"
#endif
-/*
-
-
- Watcom PC Inline Functions.
-
- Watcom Standard C does not support the C++ "inline" directive, so these
- functions have been written as inline assembler instead.
-
-*/
#ifdef __cplusplus
extern "C" {
@@ -73,524 +64,7 @@ extern "C" {
*/
-#ifdef __WATCOMC__ /* inline assember for the Watcom compiler */
-
-/* ADD */
-
-void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
-# pragma aux ADD_LL = \
-"mov eax,[esi]" \
-"mov edx,[esi+4]" \
-"add eax,[edi]" \
-"adc edx,[edi+4]" \
-"mov [ebx],eax" \
-"mov [ebx+4],edx" \
-parm[esi] [edi] [ebx] \
-modify[eax edx];
-
-
-/* ADD ++ */
-
-void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
-# pragma aux ADD_LL_PP = \
-"mov eax,[esi]" \
-"mov edx,[esi+4]" \
-"add [edi],eax" \
-"adc [edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-/* SUB */
-
-void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
-# pragma aux SUB_LL = \
-"mov eax,[esi]" \
-"mov edx,[esi+4]" \
-"sub eax,[edi]" \
-"sbb edx,[edi+4]" \
-"mov [ebx],eax" \
-"mov [ebx+4],edx" \
-parm[esi] [edi] [ebx] \
-modify[eax edx];
-
-
-
-/* SUB -- */
-
-void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a);
-# pragma aux SUB_LL_MM = \
-"mov eax,[esi]" \
-"mov edx,[esi+4]" \
-"sub [edi],eax" \
-"sbb [edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-/*
-
- MUL
-
- This is the multiply we use, the 32 x 32 = 64 widening version
-
-*/
-
-void MUL_I_WIDE(int a, int b, LONGLONGCH *c);
-# pragma aux MUL_I_WIDE = \
-"imul edx"\
-"mov [ebx],eax" \
-"mov [ebx+4],edx" \
-parm[eax] [edx] [ebx] \
-modify[eax edx];
-
-
-
-/*
-
- CMP
-
- This substitutes for ==, >, <, >=, <=
-
-*/
-
-int CMP_LL(LONGLONGCH *a, LONGLONGCH *b);
-# pragma aux CMP_LL = \
-"mov eax,[ebx]" \
-"mov edx,[ebx+4]" \
-"sub eax,[ecx]" \
-"sbb edx,[ecx+4]" \
-"and edx,edx" \
-"jne llnz" \
-"and eax,eax" \
-"jne llnz" \
-"xor eax,eax" \
-"jmp llgs" \
-"llnz:" \
-"mov eax,1" \
-"and edx,edx" \
-"jge llgs" \
-"neg eax" \
-"llgs:" \
-parm[ebx] [ecx] \
-value[eax] \
-modify[edx];
-
-
-
-
-/* EQUALS */
-
-void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b);
-# pragma aux EQUALS_LL = \
-"mov eax,[esi]" \
-"mov edx,[esi+4]" \
-"mov [edi],eax" \
-"mov [edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-/* NEGATE */
-
-void NEG_LL(LONGLONGCH *a);
-# pragma aux NEG_LL = \
-"not dword ptr[esi]" \
-"not dword ptr[esi+4]" \
-"add dword ptr[esi],1" \
-"adc dword ptr[esi+4],0" \
-parm[esi];
-
-
-/* ASR */
-
-void ASR_LL(LONGLONGCH *a, int shift);
-# pragma aux ASR_LL = \
-"and eax,eax" \
-"jle asrdn" \
-"asrlp:" \
-"sar dword ptr[esi+4],1" \
-"rcr dword ptr[esi],1" \
-"dec eax" \
-"jne asrlp" \
-"asrdn:" \
-parm[esi] [eax];
-
-
-/* Convert int to LONGLONGCH */
-
-void IntToLL(LONGLONGCH *a, int *b);
-# pragma aux IntToLL = \
-"mov eax,[esi]" \
-"cdq" \
-"mov [edi],eax" \
-"mov [edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-
-
-
-
-
-
-
-/*
-
- Fixed Point Multiply.
-
-
- 16.16 * 16.16 -> 16.16
- or
- 16.16 * 0.32 -> 0.32
-
- A proper version of this function ought to read
- 16.16 * 16.16 -> 32.16
- but this would require a long long result
-
- Algorithm:
-
- Take the mid 32 bits of the 64 bit result
-
-*/
-
-/*
- These functions have been checked for suitability for
- a Pentium and look as if they would work adequately.
- Might be worth a more detailed look at optimising
- them though.
-*/
-
-#if 0
-
-int MUL_FIXED(int a, int b);
-# pragma aux MUL_FIXED = \
-"imul edx" \
-"mov ax,dx" \
-"rol eax,16" \
-parm[eax] [edx] \
-value[eax] \
-modify[edx];
-
-#else
-
-int MUL_FIXED(int a, int b);
-# pragma aux MUL_FIXED = \
-"imul edx" \
-"shrd eax,edx,16" \
-parm[eax] [edx] \
-value[eax] \
-modify[edx];
-
-#endif
-
-
-/*
-
- Fixed Point Divide - returns a / b
-
-*/
-
-int DIV_FIXED(int a, int b);
-# pragma aux DIV_FIXED = \
-"cdq" \
-"rol eax,16" \
-"mov dx,ax" \
-"xor ax,ax" \
-"idiv ebx" \
-parm[eax] [ebx] \
-value[eax] \
-modify[edx];
-
-
-
-
-/*
-
- Multiply and Divide Functions.
-
-*/
-
-
-/*
-
- 32/32 division
-
- This macro is a function on some other platforms
-
-*/
-
-#define DIV_INT(a, b) ((a) / (b))
-
-
-
-
-/*
-
- A Narrowing 64/32 Division
-
-*/
-
-int NarrowDivide(LONGLONGCH *a, int b);
-# pragma aux NarrowDivide = \
-"mov eax,[esi]" \
-"mov edx,[esi+4]" \
-"idiv ebx" \
-parm[esi] [ebx] \
-value[eax] \
-modify[edx];
-
-
-
-/*
-
- This function performs a Widening Multiply followed by a Narrowing Divide.
-
- a = (a * b) / c
-
-*/
-
-int WideMulNarrowDiv(int a, int b, int c);
-# pragma aux WideMulNarrowDiv = \
-"imul edx"\
-"idiv ebx" \
-parm[eax] [edx] [ebx] \
-value[eax];
-
-
-
-/*
-
- Function to rotate a VECTORCH using a MATRIXCH
-
- This is the C function
-
- x = MUL_FIXED(m->mat11, v->vx);
- x += MUL_FIXED(m->mat21, v->vy);
- x += MUL_FIXED(m->mat31, v->vz);
-
- y = MUL_FIXED(m->mat12, v->vx);
- y += MUL_FIXED(m->mat22, v->vy);
- y += MUL_FIXED(m->mat32, v->vz);
-
- z = MUL_FIXED(m->mat13, v->vx);
- z += MUL_FIXED(m->mat23, v->vy);
- z += MUL_FIXED(m->mat33, v->vz);
-
- v->vx = x;
- v->vy = y;
- v->vz = z;
-
- This is the MUL_FIXED inline assembler function
-
- imul edx
- shrd eax,edx,16
-
-
-typedef struct matrixch {
-
- int mat11; 0
- int mat12; 4
- int mat13; 8
-
- int mat21; 12
- int mat22; 16
- int mat23; 20
-
- int mat31; 24
- int mat32; 28
- int mat33; 32
-
-} MATRIXCH;
-
-*/
-
-void RotateVector_ASM(VECTORCH *v, MATRIXCH *m);
-# pragma aux RotateVector_ASM = \
-\
-"push eax" \
-"push ebx" \
-"push ecx" \
-"push edx" \
-"push ebp" \
-\
-"mov eax,[edi + 0]" \
-"imul DWORD PTR [esi + 0]" \
-"shrd eax,edx,16" \
-"mov ecx,eax"\
-"mov eax,[edi + 12]" \
-"imul DWORD PTR [esi + 4]" \
-"shrd eax,edx,16" \
-"add ecx,eax" \
-"mov eax,[edi + 24]" \
-"imul DWORD PTR [esi + 8]" \
-"shrd eax,edx,16" \
-"add ecx,eax" \
-\
-"mov eax,[edi + 4]" \
-"imul DWORD PTR [esi + 0]" \
-"shrd eax,edx,16" \
-"mov ebx,eax"\
-"mov eax,[edi + 16]" \
-"imul DWORD PTR [esi + 4]" \
-"shrd eax,edx,16" \
-"add ebx,eax" \
-"mov eax,[edi + 28]" \
-"imul DWORD PTR [esi + 8]" \
-"shrd eax,edx,16" \
-"add ebx,eax" \
-\
-"mov eax,[edi + 8]" \
-"imul DWORD PTR [esi + 0]" \
-"shrd eax,edx,16" \
-"mov ebp,eax"\
-"mov eax,[edi + 20]" \
-"imul DWORD PTR [esi + 4]" \
-"shrd eax,edx,16" \
-"add ebp,eax" \
-"mov eax,[edi + 32]" \
-"imul DWORD PTR [esi + 8]" \
-"shrd eax,edx,16" \
-"add ebp,eax" \
-\
-"mov [esi + 0],ecx" \
-"mov [esi + 4],ebx" \
-"mov [esi + 8],ebp" \
-\
-"pop ebp" \
-"pop edx" \
-"pop ecx" \
-"pop ebx" \
-"pop eax" \
-\
-parm[esi] [edi];
-
-
-/*
-
- Here is the same function, this time copying the result to a second vector
-
-*/
-
-void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
-# pragma aux RotateAndCopyVector_ASM = \
-\
-"push eax" \
-"push ebx" \
-"push ecx" \
-"push ebp" \
-\
-"push edx" \
-"mov eax,[edi + 0]" \
-"imul DWORD PTR [esi + 0]" \
-"shrd eax,edx,16" \
-"mov ecx,eax"\
-"mov eax,[edi + 12]" \
-"imul DWORD PTR [esi + 4]" \
-"shrd eax,edx,16" \
-"add ecx,eax" \
-"mov eax,[edi + 24]" \
-"imul DWORD PTR [esi + 8]" \
-"shrd eax,edx,16" \
-"add ecx,eax" \
-\
-"mov eax,[edi + 4]" \
-"imul DWORD PTR [esi + 0]" \
-"shrd eax,edx,16" \
-"mov ebx,eax"\
-"mov eax,[edi + 16]" \
-"imul DWORD PTR [esi + 4]" \
-"shrd eax,edx,16" \
-"add ebx,eax" \
-"mov eax,[edi + 28]" \
-"imul DWORD PTR [esi + 8]" \
-"shrd eax,edx,16" \
-"add ebx,eax" \
-\
-"mov eax,[edi + 8]" \
-"imul DWORD PTR [esi + 0]" \
-"shrd eax,edx,16" \
-"mov ebp,eax"\
-"mov eax,[edi + 20]" \
-"imul DWORD PTR [esi + 4]" \
-"shrd eax,edx,16" \
-"add ebp,eax" \
-"mov eax,[edi + 32]" \
-"imul DWORD PTR [esi + 8]" \
-"shrd eax,edx,16" \
-"add ebp,eax" \
-\
-"pop edx" \
-"mov [edx + 0],ecx" \
-"mov [edx + 4],ebx" \
-"mov [edx + 8],ebp" \
-\
-"pop ebp" \
-"pop ecx" \
-"pop ebx" \
-"pop eax" \
-\
-parm[esi] [edx] [edi];
-
-
-
-
-#if (SupportFPMathsFunctions || SupportFPSquareRoot)
-
-/*
-
- Square Root
-
- Returns the Square Root of a 32-bit number
-
-*/
-
-static long temp;
-static long temp2;
-
-int SqRoot32(int A);
-# pragma aux SqRoot32 = \
-"finit" \
-"mov temp,eax" \
-"fild temp" \
-"fsqrt" \
-"fistp temp2" \
-"fwait" \
-"mov eax,temp2" \
-parm[eax] \
-value[eax];
-
-#endif
-
-
-/*
-
- This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
- the function call "CHP" used by the WATCOM compiler.
-
-*/
-
-static float fptmp;
-static int itmp;
-
-void FloatToInt(void);
-# pragma aux FloatToInt = \
-"fld fptmp" \
-"fistp itmp";
-
-/*
-
- This macro makes usage of the above function easier and more elegant
-
-*/
-
-#define f2i(a, b) { \
-fptmp = (b); \
-FloatToInt(); \
-a = itmp;}
-
-#elif defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */
+#if defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */
/* ADD */
@@ -1111,7 +585,7 @@ a = itmp;}
#else
-#if 1 /* GCC! */
+/* inline assembly has been moved to mathline.c */
void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
@@ -1147,752 +621,6 @@ fti_fptmp = (b); \
FloatToInt(); \
a = fti_itmp;}
-#else /* inline stuff */
-
-/* ADD */
-
-
-static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
-{
-/*
- _asm
- {
- mov esi,a
- mov edi,b
- mov ebx,c
- mov eax,[esi]
- mov edx,[esi+4]
- add eax,[edi]
- adc edx,[edi+4]
- mov [ebx],eax
- mov [ebx+4],edx
- }
-*/
-
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "addl 0(%%edi), %%eax \n\t"
- "adcl 4(%%edi), %%edx \n\t"
- "movl %%eax, 0(%%ebx) \n\t"
- "movl %%edx, 4(%%ebx) \n\t"
- :
- : "S" (a), "D" (b), "b" (c)
- : "%eax", "%edx", "memory", "cc"
- );
-
-/*
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "addl 0(%%edi), %%eax \n\t"
- "adcl 4(%%edi), %%edx \n\t"
- : "=a" (c->lo32), "=d" (c->hi32)
- : "S" (a), "D" (b)
- );
-*/
-}
-
-/* ADD ++ */
-
-static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
-{
-/*
- _asm
- {
- mov edi,c
- mov esi,a
- mov eax,[esi]
- mov edx,[esi+4]
- add [edi],eax
- adc [edi+4],edx
- }
-*/
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "addl %%eax, 0(%%edi) \n\t"
- "adcl %%edx, 4(%%edi) \n\t"
- :
- : "D" (c), "S" (a)
- : "%eax", "%edx", "memory", "cc"
- );
-}
-
-/* SUB */
-
-static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
-{
-/*
- _asm
- {
- mov esi,a
- mov edi,b
- mov ebx,c
- mov eax,[esi]
- mov edx,[esi+4]
- sub eax,[edi]
- sbb edx,[edi+4]
- mov [ebx],eax
- mov [ebx+4],edx
- }
-*/
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "subl 0(%%edi), %%eax \n\t"
- "sbbl 4(%%edi), %%edx \n\t"
- "movl %%eax, 0(%%ebx) \n\t"
- "movl %%edx, 4(%%ebx) \n\t"
- :
- : "S" (a), "D" (b), "b" (c)
- : "%eax", "%edx", "memory", "cc"
- );
-}
-
-/* SUB -- */
-
-static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
-{
-/*
- _asm
- {
- mov edi,c
- mov esi,a
- mov eax,[esi]
- mov edx,[esi+4]
- sub [edi],eax
- sbb [edi+4],edx
- }
-*/
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "subl %%eax, 0(%%edi) \n\t"
- "sbbl %%edx, 4(%%edi) \n\t"
- :
- : "D" (c), "S" (a)
- : "%eax", "%edx", "memory", "cc"
- );
-}
-
-/*
-
- MUL
-
- This is the multiply we use, the 32 x 32 = 64 widening version
-
-*/
-
-static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
-{
-/*
- _asm
- {
- mov eax,a
- mov ebx,c
- imul b
- mov [ebx],eax
- mov [ebx+4],edx
- }
-*/
-__asm__("imull %2 \n\t"
- "movl %%eax, 0(%%ebx) \n\t"
- "movl %%edx, 4(%%ebx) \n\t"
- :
- : "a" (a), "b" (c), "q" (b)
- : "%edx", "memory", "cc"
- );
-}
-
-/*
-
- CMP
-
- This substitutes for ==, >, <, >=, <=
-
-*/
-
-static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
-{
- int retval;
-/*
- _asm
- {
- mov ebx,a
- mov ecx,b
- mov eax,[ebx]
- mov edx,[ebx+4]
- sub eax,[ecx]
- sbb edx,[ecx+4]
- and edx,edx
- jne llnz
- and eax,eax
- je llgs
- llnz:
- mov retval,1
- and edx,edx
- jge llgs
- neg retval
- llgs:
- }
-*/
-/* TODO */
-__asm__("movl 0(%%ebx), %%eax \n\t"
- "movl 4(%%ebx), %%edx \n\t"
- "subl 0(%%ecx), %%eax \n\t"
- "sbbl 4(%%ecx), %%edx \n\t"
- "xorl %0, %0 \n\t" /* hopefully it doesn't pick %eax or %edx */
- "andl %%edx, %%edx \n\t"
- "jne 0 \n\t" /* llnz */
- "andl %%eax, %%eax \n\t"
- "je 1 \n" /* llgs */
-"0: \n\t" /* llnz */
- "movl $1, %0 \n\t"
- "andl %%edx, %%edx \n\t"
- "jge 1 \n\t" /* llgs */
- "negl %0 \n"
-"1: \n\t" /* llgs */
- : "=r" (retval)
- : "b" (a), "c" (b)
- : "%eax", "%edx", "memory", "cc"
- );
-
- return retval;
-}
-
-/* EQUALS */
-
-static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
-{
-/*
- _asm
- {
- mov edi,a
- mov esi,b
- mov eax,[esi]
- mov edx,[esi+4]
- mov [edi],eax
- mov [edi+4],edx
- }
-*/
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "movl %%eax, 0(%%edi) \n\t"
- "movl %%edx, 4(%%edi) \n\t"
- :
- : "D" (a), "S" (b)
- : "%eax", "%edx", "memory"
- );
-}
-
-/* NEGATE */
-
-static __inline__ void NEG_LL(LONGLONGCH *a)
-{
-/*
- _asm
- {
- mov esi,a
- not dword ptr[esi]
- not dword ptr[esi+4]
- add dword ptr[esi],1
- adc dword ptr[esi+4],0
- }
-*/
-__asm__("notl 0(%%esi) \n\t"
- "notl 4(%%esi) \n\t"
- "addl $1, 0(%%esi) \n\t"
- "adcl $0, 4(%%esi) \n\t"
- :
- : "S" (a)
- : "memory", "cc"
- );
-}
-
-/* ASR */
-
-static __inline__ void ASR_LL(LONGLONGCH *a, int shift)
-{
-/*
- _asm
- {
- mov esi,a
- mov eax,shift
- and eax,eax
- jle asrdn
- asrlp:
- sar dword ptr[esi+4],1
- rcr dword ptr[esi],1
- dec eax
- jne asrlp
- asrdn:
- }
-*/
-__asm__("andl %%eax, %%eax \n\t"
- "jle 0 \n" /* asrdn */
-"1: \n\t" /* asrlp */
- "sarl $1, 4(%%esi) \n\t"
- "rcrl $1, 0(%%esi) \n\t"
- "decl %%eax \n\t"
- "jne 1 \n"
-"0: \n\t"
- :
- : "S" (a), "a" (shift)
- : "memory", "cc"
- );
-
-}
-
-/* Convert int to LONGLONGCH */
-
-static __inline__ void IntToLL(LONGLONGCH *a, int *b)
-{
-/*
- _asm
- {
- mov esi,b
- mov edi,a
- mov eax,[esi]
- cdq
- mov [edi],eax
- mov [edi+4],edx
- }
-*/
-__asm__("movl 0(%%esi), %%eax \n\t"
- "cdq \n\t"
- "movl %%eax, 0(%%edi) \n\t"
- "movl %%edx, 4(%%edi) \n\t"
- :
- : "S" (b), "D" (a)
- : "%eax", "%edx", "memory", "cc"
- );
-
-}
-
-/*
-
- Fixed Point Multiply.
-
-
- 16.16 * 16.16 -> 16.16
- or
- 16.16 * 0.32 -> 0.32
-
- A proper version of this function ought to read
- 16.16 * 16.16 -> 32.16
- but this would require a long long result
-
- Algorithm:
-
- Take the mid 32 bits of the 64 bit result
-
-*/
-
-/*
- These functions have been checked for suitability for
- a Pentium and look as if they would work adequately.
- Might be worth a more detailed look at optimising
- them though.
-*/
-
-static __inline__ int MUL_FIXED(int a, int b)
-{
- int retval;
-/*
- _asm
- {
- mov eax,a
- imul b
- shrd eax,edx,16
- mov retval,eax
- }
-*/
-/* TODO */
-__asm__("imull %2 \n\t"
- "shrdl $16, %%edx, %%eax \n\t"
- : "=a" (retval)
- : "a" (a), "q" (b)
- : "%edx", "cc"
- );
- return retval;
-}
-
-/*
-
- Fixed Point Divide - returns a / b
-
-*/
-
-static __inline__ int DIV_FIXED(int a, int b)
-{
- int retval;
-/*
- _asm
- {
- mov eax,a
- cdq
- rol eax,16
- mov dx,ax
- xor ax,ax
- idiv b
- mov retval,eax
- }
-*/
-/* TODO */
-__asm__("cdq \n\t"
- "roll $16, %%eax \n\t"
- "mov %%ax, %%dx \n\t"
- "xor %%ax, %%ax \n\t"
- "idivl %2 \n\t"
- : "=a" (retval)
- : "a" (a), "q" (b)
- : "%edx", "cc"
- );
- return retval;
-}
-
-/*
-
- Multiply and Divide Functions.
-
-*/
-
-
-/*
-
- 32/32 division
-
- This macro is a function on some other platforms
-
-*/
-
-#define DIV_INT(a, b) ((a) / (b))
-
-/*
-
- A Narrowing 64/32 Division
-
-*/
-
-static __inline__ int NarrowDivide(LONGLONGCH *a, int b)
-{
- int retval;
-/*
- _asm
- {
- mov esi,a
- mov eax,[esi]
- mov edx,[esi+4]
- idiv b
- mov retval,eax
- }
-*/
-__asm__("movl 0(%%esi), %%eax \n\t"
- "movl 4(%%esi), %%edx \n\t"
- "idivl %2 \n\t"
- : "=a" (retval)
- : "S" (a), "q" (b)
- : "%edx", "cc"
- );
- return retval;
-}
-
-/*
-
- This function performs a Widening Multiply followed by a Narrowing Divide.
-
- a = (a * b) / c
-
-*/
-
-static __inline__ int WideMulNarrowDiv(int a, int b, int c)
-{
-#if 0 /* TODO: broken? */
- int retval;
-/*
- _asm
- {
- mov eax,a
- imul b
- idiv c
- mov retval,eax
- }
-*/
-/* TODO */
-__asm__("imull %2 \n\t"
- "idivl %3 \n\t"
- : "=a" (retval)
- : "a" (a), "q" (b), "q" (c)
- : "cc"
- );
- return retval;
-#endif
- return (a * b) / c;
-}
-
-/*
-
- Function to rotate a VECTORCH using a MATRIXCH
-
- This is the C function
-
- x = MUL_FIXED(m->mat11, v->vx);
- x += MUL_FIXED(m->mat21, v->vy);
- x += MUL_FIXED(m->mat31, v->vz);
-
- y = MUL_FIXED(m->mat12, v->vx);
- y += MUL_FIXED(m->mat22, v->vy);
- y += MUL_FIXED(m->mat32, v->vz);
-
- z = MUL_FIXED(m->mat13, v->vx);
- z += MUL_FIXED(m->mat23, v->vy);
- z += MUL_FIXED(m->mat33, v->vz);
-
- v->vx = x;
- v->vy = y;
- v->vz = z;
-
- This is the MUL_FIXED inline assembler function
-
- imul edx
- shrd eax,edx,16
-
-
-typedef struct matrixch {
-
- int mat11; 0
- int mat12; 4
- int mat13; 8
-
- int mat21; 12
- int mat22; 16
- int mat23; 20
-
- int mat31; 24
- int mat32; 28
- int mat33; 32
-
-} MATRIXCH;
-
-*/
-
-#if 0 /* TODO if these are needed */
-static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m)
-{
- _asm
- {
- mov esi,v
- mov edi,m
-
- mov eax,[edi + 0]
- imul DWORD PTR [esi + 0]
- shrd eax,edx,16
- mov ecx,eax
- mov eax,[edi + 12]
- imul DWORD PTR [esi + 4]
- shrd eax,edx,16
- add ecx,eax
- mov eax,[edi + 24]
- imul DWORD PTR [esi + 8]
- shrd eax,edx,16
- add ecx,eax
-
- mov eax,[edi + 4]
- imul DWORD PTR [esi + 0]
- shrd eax,edx,16
- mov ebx,eax
- mov eax,[edi + 16]
- imul DWORD PTR [esi + 4]
- shrd eax,edx,16
- add ebx,eax
- mov eax,[edi + 28]
- imul DWORD PTR [esi + 8]
- shrd eax,edx,16
- add ebx,eax
-
- mov eax,[edi + 8]
- imul DWORD PTR [esi + 0]
- shrd eax,edx,16
- mov ebp,eax
- mov eax,[edi + 20]
- imul DWORD PTR [esi + 4]
- shrd eax,edx,16
- add ebp,eax
- mov eax,[edi + 32]
- imul DWORD PTR [esi + 8]
- shrd eax,edx,16
- add ebp,eax
-
- mov [esi + 0],ecx
- mov [esi + 4],ebx
- mov [esi + 8],ebp
- }
-}
-
-/*
-
- Here is the same function, this time copying the result to a second vector
-
-*/
-
-static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m)
-{
- _asm
- {
- mov esi,v1
- mov edi,m
-
- mov eax,[edi + 0]
- imul DWORD PTR [esi + 0]
- shrd eax,edx,16
- mov ecx,eax
- mov eax,[edi + 12]
- imul DWORD PTR [esi + 4]
- shrd eax,edx,16
- add ecx,eax
- mov eax,[edi + 24]
- imul DWORD PTR [esi + 8]
- shrd eax,edx,16
- add ecx,eax
-
- mov eax,[edi + 4]
- imul DWORD PTR [esi + 0]
- shrd eax,edx,16
- mov ebx,eax
- mov eax,[edi + 16]
- imul DWORD PTR [esi + 4]
- shrd eax,edx,16
- add ebx,eax
- mov eax,[edi + 28]
- imul DWORD PTR [esi + 8]
- shrd eax,edx,16
- add ebx,eax
-
- mov eax,[edi + 8]
- imul DWORD PTR [esi + 0]
- shrd eax,edx,16
- mov ebp,eax
- mov eax,[edi + 20]
- imul DWORD PTR [esi + 4]
- shrd eax,edx,16
- add ebp,eax
- mov eax,[edi + 32]
- imul DWORD PTR [esi + 8]
- shrd eax,edx,16
- add ebp,eax
-
- mov edx,v2
- mov [edx + 0],ecx
- mov [edx + 4],ebx
- mov [edx + 8],ebp
- }
-}
-#endif
-
-#if (SupportFPMathsFunctions || SupportFPSquareRoot)
-
-/*
-
- Square Root
-
- Returns the Square Root of a 32-bit number
-
-*/
-
-extern int sqrt_temp1;
-extern int sqrt_temp2;
-
-#include <math.h>
-static __inline__ int SqRoot32(int A)
-{
-#if 0
- sqrt_temp1 = A;
-/*
- _asm
- {
- finit
- fild A
- fsqrt
- fistp temp2
- fwait
- }
-*/
-
-__asm__("finit \n\t"
- "fild sqrt_temp1 \n\t"
- "fsqrt \n\t"
- "fistp sqrt_temp2 \n\t"
- "fwait \n\t"
- :
- :
- : "memory", "cc"
- );
-
- return sqrt_temp2;
-#endif
-{ /* TODO: clean this please */
- double x = A;
- double retvald = sqrt(x);
- int retval = retvald;
- return retval;
-}
-}
-
-#endif
-
-
-/*
-
- This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
- the function call "CHP" used by the WATCOM compiler.
-
-*/
-
-extern float fti_fptmp;
-extern int fti_itmp;
-
-static __inline__ int FloatToInt(float fptmp)
-{
-#if 0
- fti_fptmp = fptmp;
-/*
- _asm
- {
- fld fptmp
- fistp itmp
- }
-*/
-__asm__("fld fti_fptmp \n\t"
- "fistp fti_itmp \n\t"
- :
- :
- : "memory", "cc"
- );
-
- return fti_itmp;
-#endif
-
- return fptmp;
-}
-
-/*
-
- This macro makes usage of the above function easier and more elegant
-
-*/
-
-#define f2i(a, b) { \
-a = FloatToInt(b); \
-}
-
-
-#if 0
-int SqRoot32(int A);
-void FloatToInt();
-#define f2i(a, b) { \
-fti_fptmp = (b); \
-FloatToInt(); \
-a = fti_itmp;}
-#endif
-
-#endif
-
#endif
int WideMul2NarrowDiv(int a, int b, int c, int d, int e);