summaryrefslogtreecommitdiff
path: root/src/win95
diff options
context:
space:
mode:
Diffstat (limited to 'src/win95')
-rw-r--r--src/win95/inline.h720
-rw-r--r--src/win95/plspecfn.c180
2 files changed, 723 insertions, 177 deletions
diff --git a/src/win95/inline.h b/src/win95/inline.h
index f09e79a..6054f29 100644
--- a/src/win95/inline.h
+++ b/src/win95/inline.h
@@ -1215,9 +1215,9 @@ fptmp = (b); \
FloatToInt(); \
a = itmp;}
-#else /* other compiler ? */
+#else
-/* #error "Unknown compiler" */
+#if 0
void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
@@ -1240,6 +1240,722 @@ void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
int FloatToInt(float);
#define f2i(a, b) { a = FloatToInt(b); }
+#endif
+
+/* ADD */
+
+static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
+{
+/*
+ _asm
+ {
+ mov esi,a
+ mov edi,b
+ mov ebx,c
+ mov eax,[esi]
+ mov edx,[esi+4]
+ add eax,[edi]
+ adc edx,[edi+4]
+ mov [ebx],eax
+ mov [ebx+4],edx
+ }
+*/
+
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "addl 0(%%edi), %%eax \n\t"
+ "adcl 4(%%edi), %%edx \n\t"
+ "movl %%eax, 0(%%ebx) \n\t"
+ "movl %%edx, 4(%%ebx) \n\t"
+ :
+ : "S" (a), "D" (b), "b" (c)
+ : "%eax", "%edx", "memory", "cc"
+ );
+
+/*
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "addl 0(%%edi), %%eax \n\t"
+ "adcl 4(%%edi), %%edx \n\t"
+ : "=a" (c->lo32), "=d" (c->hi32)
+ : "S" (a), "D" (b)
+ );
+*/
+}
+
+/* ADD ++ */
+
+static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
+{
+/*
+ _asm
+ {
+ mov edi,c
+ mov esi,a
+ mov eax,[esi]
+ mov edx,[esi+4]
+ add [edi],eax
+ adc [edi+4],edx
+ }
+*/
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "addl %%eax, 0(%%edi) \n\t"
+ "adcl %%edx, 4(%%edi) \n\t"
+ :
+ : "D" (c), "S" (a)
+ : "%eax", "%edx", "memory", "cc"
+ );
+}
+
+/* SUB */
+
+static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
+{
+/*
+ _asm
+ {
+ mov esi,a
+ mov edi,b
+ mov ebx,c
+ mov eax,[esi]
+ mov edx,[esi+4]
+ sub eax,[edi]
+ sbb edx,[edi+4]
+ mov [ebx],eax
+ mov [ebx+4],edx
+ }
+*/
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "subl 0(%%edi), %%eax \n\t"
+ "sbbl 4(%%edi), %%edx \n\t"
+ "movl %%eax, 0(%%ebx) \n\t"
+ "movl %%edx, 4(%%ebx) \n\t"
+ :
+ : "S" (a), "D" (b), "b" (c)
+ : "%eax", "%edx", "memory", "cc"
+ );
+}
+
+/* SUB -- */
+
+static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
+{
+/*
+ _asm
+ {
+ mov edi,c
+ mov esi,a
+ mov eax,[esi]
+ mov edx,[esi+4]
+ sub [edi],eax
+ sbb [edi+4],edx
+ }
+*/
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "subl %%eax, 0(%%edi) \n\t"
+ "sbbl %%edx, 4(%%edi) \n\t"
+ :
+ : "D" (c), "S" (a)
+ : "%eax", "%edx", "memory", "cc"
+ );
+}
+
+/*
+
+ MUL
+
+ This is the multiply we use, the 32 x 32 = 64 widening version
+
+*/
+
+static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
+{
+/*
+ _asm
+ {
+ mov eax,a
+ mov ebx,c
+ imul b
+ mov [ebx],eax
+ mov [ebx+4],edx
+ }
+*/
+__asm__("imull %0 \n\t"
+ "movl %%eax, 0(%%ebx) \n\t"
+ "movl %%edx, 4(%%ebx) \n\t"
+ :
+ : "a" (a), "b" (c), "q" (b)
+ : "%edx", "memory", "cc"
+ );
+}
+
+/*
+
+ CMP
+
+ This substitutes for ==, >, <, >=, <=
+
+*/
+
+static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
+{
+ int retval;
+/*
+ _asm
+ {
+ mov ebx,a
+ mov ecx,b
+ mov eax,[ebx]
+ mov edx,[ebx+4]
+ sub eax,[ecx]
+ sbb edx,[ecx+4]
+ and edx,edx
+ jne llnz
+ and eax,eax
+ je llgs
+ llnz:
+ mov retval,1
+ and edx,edx
+ jge llgs
+ neg retval
+ llgs:
+ }
+*/
+/* TODO */
+__asm__("xorl %0, %0 \n\t"
+ "movl 0(%%ebx), %%eax \n\t"
+ "movl 4(%%ebx), %%edx \n\t"
+ "subl 0(%%ecx), %%eax \n\t"
+ "sbbl 4(%%ecx), %%edx \n\t"
+ "andl %%edx, %%edx \n\t"
+ "jne llnz \n\t"
+ "andl %%eax, %%eax \n\t"
+ "je llgs \n"
+"llnz: \n\t"
+ "movl $1, %0 \n\t"
+ "andl %%edx, %%edx \n\t"
+ "jge llgs \n\t"
+ "negl %0 \n"
+"llgs: \n\t"
+ : "=r" (retval)
+ : "b" (a), "c" (b)
+ : "%eax", "%edx", "memory", "cc"
+ );
+
+ return retval;
+}
+
+/* EQUALS */
+
+static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
+{
+/*
+ _asm
+ {
+ mov edi,a
+ mov esi,b
+ mov eax,[esi]
+ mov edx,[esi+4]
+ mov [edi],eax
+ mov [edi+4],edx
+ }
+*/
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "movl %%eax, 0(%%edi) \n\t"
+ "movl %%edx, 4(%%edi) \n\t"
+ :
+ : "D" (a), "S" (b)
+ : "%eax", "%edx", "memory"
+ );
+}
+
+/* NEGATE */
+
+static __inline__ void NEG_LL(LONGLONGCH *a)
+{
+/*
+ _asm
+ {
+ mov esi,a
+ not dword ptr[esi]
+ not dword ptr[esi+4]
+ add dword ptr[esi],1
+ adc dword ptr[esi+4],0
+ }
+*/
+__asm__("notl 0(%%esi) \n\t"
+ "notl 4(%%esi) \n\t"
+ "addl $1, 0(%%esi) \n\t"
+ "adcl $0, 4(%%esi) \n\t"
+ :
+ : "S" (a)
+ : "memory", "cc"
+ );
+}
+
+/* ASR */
+
+static __inline__ void ASR_LL(LONGLONGCH *a, int shift)
+{
+/*
+ _asm
+ {
+ mov esi,a
+ mov eax,shift
+ and eax,eax
+ jle asrdn
+ asrlp:
+ sar dword ptr[esi+4],1
+ rcr dword ptr[esi],1
+ dec eax
+ jne asrlp
+ asrdn:
+ }
+*/
+__asm__("andl %%eax, %%eax \n\t"
+ "jle asrdn \n"
+"asrlp: \n\t"
+ "sarl $1, 4(%%esi) \n\t"
+ "rcrl $1, 0(%%esi) \n\t"
+ "decl %%eax \n\t"
+ "jne asrlp \n"
+"asrdn: \n\t"
+ :
+ : "S" (a), "a" (shift)
+ : "memory", "cc"
+ );
+
+}
+
+/* Convert int to LONGLONGCH */
+
+static __inline__ void IntToLL(LONGLONGCH *a, int *b)
+{
+/*
+ _asm
+ {
+ mov esi,b
+ mov edi,a
+ mov eax,[esi]
+ cdq
+ mov [edi],eax
+ mov [edi+4],edx
+ }
+*/
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "cdq \n\t"
+ "movl %%eax, 0(%%edi) \n\t"
+ "movl %%edx, 4(%%edi) \n\t"
+ :
+ : "S" (b), "D" (a)
+ : "%eax", "%edx", "memory", "cc"
+ );
+
+}
+
+/*
+
+ Fixed Point Multiply.
+
+
+ 16.16 * 16.16 -> 16.16
+ or
+ 16.16 * 0.32 -> 0.32
+
+ A proper version of this function ought to read
+ 16.16 * 16.16 -> 32.16
+ but this would require a long long result
+
+ Algorithm:
+
+ Take the mid 32 bits of the 64 bit result
+
+*/
+
+/*
+ These functions have been checked for suitability for
+ a Pentium and look as if they would work adequately.
+ Might be worth a more detailed look at optimising
+ them though.
+*/
+
+static __inline__ int MUL_FIXED(int a, int b)
+{
+ int retval;
+/*
+ _asm
+ {
+ mov eax,a
+ imul b
+ shrd eax,edx,16
+ mov retval,eax
+ }
+*/
+/* TODO */
+__asm__("imull %0 \n\t"
+ "shrdl $16, %%edx, %%eax \n\t"
+ : "=a" (retval)
+ : "a" (a), "q" (b)
+ : "%edx", "cc"
+ );
+ return retval;
+}
+
+/*
+
+ Fixed Point Divide - returns a / b
+
+*/
+
+static __inline__ int DIV_FIXED(int a, int b)
+{
+ int retval;
+/*
+ _asm
+ {
+ mov eax,a
+ cdq
+ rol eax,16
+ mov dx,ax
+ xor ax,ax
+ idiv b
+ mov retval,eax
+ }
+*/
+/* TODO */
+__asm__("cdq \n\t"
+ "roll $16, %%eax \n\t"
+ "mov %%ax, %%dx \n\t"
+ "xor %%ax, %%ax \n\t"
+ "idivl %0 \n\t"
+ : "=a" (retval)
+ : "a" (a), "q" (b)
+ : "%edx", "cc"
+ );
+ return retval;
+}
+
+/*
+
+ Multiply and Divide Functions.
+
+*/
+
+
+/*
+
+ 32/32 division
+
+ This macro is a function on some other platforms
+
+*/
+
+#define DIV_INT(a, b) ((a) / (b))
+
+/*
+
+ A Narrowing 64/32 Division
+
+*/
+
+static __inline__ int NarrowDivide(LONGLONGCH *a, int b)
+{
+ int retval;
+/*
+ _asm
+ {
+ mov esi,a
+ mov eax,[esi]
+ mov edx,[esi+4]
+ idiv b
+ mov retval,eax
+ }
+*/
+__asm__("movl 0(%%esi), %%eax \n\t"
+ "movl 4(%%esi), %%edx \n\t"
+ "idivl %0 \n\t"
+ : "=a" (retval)
+ : "S" (a), "q" (b)
+ : "%edx", "cc"
+ );
+ return retval;
+}
+
+/*
+
+ This function performs a Widening Multiply followed by a Narrowing Divide.
+
+ a = (a * b) / c
+
+*/
+
+static __inline__ int WideMulNarrowDiv(int a, int b, int c)
+{
+ int retval;
+/*
+ _asm
+ {
+ mov eax,a
+ imul b
+ idiv c
+ mov retval,eax
+ }
+*/
+/* TODO */
+__asm__("imull %0 \n\t"
+ "idivl %1 \n\t"
+ : "=a" (retval)
+ : "a" (a), "q" (b), "q" (c)
+ : "cc"
+ );
+ return retval;
+}
+
+/*
+
+ Function to rotate a VECTORCH using a MATRIXCH
+
+ This is the C function
+
+ x = MUL_FIXED(m->mat11, v->vx);
+ x += MUL_FIXED(m->mat21, v->vy);
+ x += MUL_FIXED(m->mat31, v->vz);
+
+ y = MUL_FIXED(m->mat12, v->vx);
+ y += MUL_FIXED(m->mat22, v->vy);
+ y += MUL_FIXED(m->mat32, v->vz);
+
+ z = MUL_FIXED(m->mat13, v->vx);
+ z += MUL_FIXED(m->mat23, v->vy);
+ z += MUL_FIXED(m->mat33, v->vz);
+
+ v->vx = x;
+ v->vy = y;
+ v->vz = z;
+
+ This is the MUL_FIXED inline assembler function
+
+ imul edx
+ shrd eax,edx,16
+
+
+typedef struct matrixch {
+
+ int mat11; 0
+ int mat12; 4
+ int mat13; 8
+
+ int mat21; 12
+ int mat22; 16
+ int mat23; 20
+
+ int mat31; 24
+ int mat32; 28
+ int mat33; 32
+
+} MATRIXCH;
+
+*/
+
+#if 0 /* TODO if these are needed */
+static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m)
+{
+ _asm
+ {
+ mov esi,v
+ mov edi,m
+
+ mov eax,[edi + 0]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ecx,eax
+ mov eax,[edi + 12]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ecx,eax
+ mov eax,[edi + 24]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ecx,eax
+
+ mov eax,[edi + 4]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebx,eax
+ mov eax,[edi + 16]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebx,eax
+ mov eax,[edi + 28]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebx,eax
+
+ mov eax,[edi + 8]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebp,eax
+ mov eax,[edi + 20]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebp,eax
+ mov eax,[edi + 32]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebp,eax
+
+ mov [esi + 0],ecx
+ mov [esi + 4],ebx
+ mov [esi + 8],ebp
+ }
+}
+
+/*
+
+ Here is the same function, this time copying the result to a second vector
+
+*/
+
+static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m)
+{
+ _asm
+ {
+ mov esi,v1
+ mov edi,m
+
+ mov eax,[edi + 0]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ecx,eax
+ mov eax,[edi + 12]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ecx,eax
+ mov eax,[edi + 24]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ecx,eax
+
+ mov eax,[edi + 4]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebx,eax
+ mov eax,[edi + 16]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebx,eax
+ mov eax,[edi + 28]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebx,eax
+
+ mov eax,[edi + 8]
+ imul DWORD PTR [esi + 0]
+ shrd eax,edx,16
+ mov ebp,eax
+ mov eax,[edi + 20]
+ imul DWORD PTR [esi + 4]
+ shrd eax,edx,16
+ add ebp,eax
+ mov eax,[edi + 32]
+ imul DWORD PTR [esi + 8]
+ shrd eax,edx,16
+ add ebp,eax
+
+ mov edx,v2
+ mov [edx + 0],ecx
+ mov [edx + 4],ebx
+ mov [edx + 8],ebp
+ }
+}
+#endif
+
+#if (SupportFPMathsFunctions || SupportFPSquareRoot)
+
+/*
+
+ Square Root
+
+ Returns the Square Root of a 32-bit number
+
+*/
+
+extern int sqrt_temp1;
+extern int sqrt_temp2;
+
+static __inline__ int SqRoot32(int A)
+{
+ sqrt_temp1 = A;
+/*
+ _asm
+ {
+ finit
+ fild A
+ fsqrt
+ fistp temp2
+ fwait
+ }
+*/
+
+__asm__("finit \n\t"
+ "fild sqrt_temp1 \n\t"
+ "fsqrt \n\t"
+ "fistp sqrt_temp2 \n\t"
+ "fwait \n\t"
+ :
+ :
+ : "memory", "cc"
+ );
+
+ return sqrt_temp2;
+}
+
+#endif
+
+
+/*
+
+ This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
+ the function call "CHP" used by the WATCOM compiler.
+
+*/
+
+extern float fti_fptmp;
+extern int fti_itmp;
+
+static __inline__ int FloatToInt(float fptmp)
+{
+ fti_fptmp = fptmp;
+/*
+ _asm
+ {
+ fld fptmp
+ fistp itmp
+ }
+*/
+__asm__("fld fti_fptmp \n\t"
+ "fistp fti_itmp \n\t"
+ :
+ :
+ : "memory", "cc"
+ );
+
+ return fti_itmp;
+}
+
+/*
+
+ This macro makes usage of the above function easier and more elegant
+
+*/
+
+#define f2i(a, b) { \
+a = FloatToInt(b); \
+}
#endif
diff --git a/src/win95/plspecfn.c b/src/win95/plspecfn.c
index 26c9527..0efb61e 100644
--- a/src/win95/plspecfn.c
+++ b/src/win95/plspecfn.c
@@ -18,6 +18,11 @@
#include "kshape.h"
#endif
+/* globals from inline.h */
+int sqrt_temp1;
+int sqrt_temp2;
+float fti_fptmp;
+int fti_itmp;
/*
@@ -513,88 +518,6 @@ int WideMul2NarrowDiv(int a, int b, int c, int d, int e)
}
-
-
-
-/*
-
- Square Root
-
- Returns the Square Root of a 32-bit number
-
-*/
-
-#if (SupportFPMathsFunctions || SupportFPSquareRoot)
-#else
-
-
-int SqRoot32(int A)
-
-{
-
- unsigned int edx = A;
- unsigned int ecx;
-
- unsigned int ax = 0;
- unsigned int bx = 0;
- unsigned int di = 0;
-
-
- for(ecx = 15; ecx!=0; ecx--) {
-
- bx <<= 1;
- if(edx & 0x80000000) bx |= 1;
- edx <<= 1;
-
- bx <<= 1;
- if(edx & 0x80000000) bx |= 1;
- edx <<= 1;
-
- ax += ax;
- di = ax;
- di += di;
-
- if(bx > di) {
-
- di++;
- ax++;
-
- bx -= di;
-
- }
-
- }
-
- bx <<= 1;
- if(edx & 0x80000000) bx |= 1;
- edx <<= 1;
-
- bx <<= 1;
- if(edx & 0x80000000) bx |= 1;
- edx <<= 1;
-
- ax += ax;
- di = ax;
- di += di;
-
- if(bx > di) {
-
- ax++;
-
- }
-
- return ((int)ax);
-
-}
-
-
-#endif /* SupportFPMathsFunctions */
-
-
-
-
-
-
/*
Calculate Plane Normal from three POP's
@@ -1115,99 +1038,6 @@ int Magnitude(VECTORCH *v)
}
-
-
-
-
-
-
-
-
-
-/*
-
- 64-bit Square Root returns 32-bit result
-
- All 64-bit operations are now done using the type LONGLONGCH whose format
- varies from platform to platform, although it is always 64-bits in size.
-
- NOTE:
-
- Function currently not available to Watcom C users
- A Floating point version is STRONGLY advised for the PC anyway
-
-*/
-
-#if 0
-int SqRoot64(LONGLONGCH *A)
-
-{
-
-#if 0
-
- unsigned long long edx = *A;
-
- unsigned int eax = 0;
- unsigned int ebx = 0;
- unsigned int edi = 0;
-
- unsigned int ecx;
-
-
- unsigned long long TopBit = 0x8000000000000000LL;
-
- for(ecx = 31; ecx != 0; ecx--) {
-
- ebx <<= 1;
- if(edx & TopBit) ebx |= 1;
- edx <<= 1;
-
- ebx <<= 1;
- if(edx & TopBit) ebx |= 1;
- edx <<= 1;
-
- eax += eax;
- edi = eax;
- edi += edi;
-
- if(ebx > edi) {
-
- edi++;
- eax++;
- ebx -= edi;
-
- }
-
- }
-
- ebx <<= 1;
- if(edx & TopBit) ebx |= 1;
- edx <<= 1;
-
- ebx <<= 1;
- if(edx & TopBit) ebx |= 1;
- edx <<= 1;
-
- eax += eax;
- edi = eax;
- edi += edi;
-
- if(ebx > edi) {
-
- eax++;
-
- }
-
- return eax;
-
-#endif
-
- return (0);
-
-}
-
-#endif /* for #if 0 */
-
/*
Shift the 64-bit value until is LTE the limit