1 files changed, 2 insertions, 1274 deletions
diff --git a/src/win95/inline.h b/src/win95/inline.h
index da000e4..717881d 100644
--- a/src/win95/inline.h
+++ b/src/win95/inline.h
@@ -14,15 +14,6 @@
 #include "mmx_math.h"
 #endif
 
-/*
-
-
- Watcom PC Inline Functions.
-
- Watcom Standard C does not support the C++ "inline" directive, so these
- functions have been written as inline assembler instead.
-
-*/
 
 #ifdef __cplusplus
 extern "C" {
@@ -73,524 +64,7 @@ extern "C" {
 */
 
 
-#ifdef __WATCOMC__ /* inline assember for the Watcom compiler */
-
-/* ADD */
-
-void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
-# pragma aux ADD_LL = \
-"mov	eax,[esi]" \
-"mov	edx,[esi+4]" \
-"add	eax,[edi]" \
-"adc	edx,[edi+4]" \
-"mov	[ebx],eax" \
-"mov	[ebx+4],edx" \
-parm[esi] [edi] [ebx] \
-modify[eax edx];
-
-
-/* ADD ++ */
-
-void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
-# pragma aux ADD_LL_PP = \
-"mov	eax,[esi]" \
-"mov	edx,[esi+4]" \
-"add	[edi],eax" \
-"adc	[edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-/* SUB */
-
-void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
-# pragma aux SUB_LL = \
-"mov	eax,[esi]" \
-"mov	edx,[esi+4]" \
-"sub	eax,[edi]" \
-"sbb	edx,[edi+4]" \
-"mov	[ebx],eax" \
-"mov	[ebx+4],edx" \
-parm[esi] [edi] [ebx] \
-modify[eax edx];
-
-
-
-/* SUB -- */
-
-void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a);
-# pragma aux SUB_LL_MM = \
-"mov	eax,[esi]" \
-"mov	edx,[esi+4]" \
-"sub	[edi],eax" \
-"sbb	[edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-/*
-
- MUL
-
- This is the multiply we use, the 32 x 32 = 64 widening version
-
-*/
-
-void MUL_I_WIDE(int a, int b, LONGLONGCH *c);
-# pragma aux MUL_I_WIDE = \
-"imul	edx"\
-"mov	[ebx],eax" \
-"mov	[ebx+4],edx" \
-parm[eax] [edx] [ebx] \
-modify[eax edx];
-
-
-
-/*
-
- CMP
-
- This substitutes for ==, >, <, >=, <=
-
-*/
-
-int CMP_LL(LONGLONGCH *a, LONGLONGCH *b);
-# pragma aux CMP_LL = \
-"mov	eax,[ebx]" \
-"mov	edx,[ebx+4]" \
-"sub	eax,[ecx]" \
-"sbb	edx,[ecx+4]" \
-"and	edx,edx" \
-"jne	llnz" \
-"and	eax,eax" \
-"jne	llnz" \
-"xor	eax,eax" \
-"jmp	llgs" \
-"llnz:" \
-"mov	eax,1" \
-"and	edx,edx" \
-"jge	llgs" \
-"neg	eax" \
-"llgs:" \
-parm[ebx] [ecx] \
-value[eax] \
-modify[edx];
-
-
-
-
-/* EQUALS */
-
-void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b);
-# pragma aux EQUALS_LL = \
-"mov	eax,[esi]" \
-"mov	edx,[esi+4]" \
-"mov	[edi],eax" \
-"mov	[edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-/* NEGATE */
-
-void NEG_LL(LONGLONGCH *a);
-# pragma aux NEG_LL = \
-"not	dword ptr[esi]" \
-"not	dword ptr[esi+4]" \
-"add	dword ptr[esi],1" \
-"adc	dword ptr[esi+4],0" \
-parm[esi];
-
-
-/* ASR */
-
-void ASR_LL(LONGLONGCH *a, int shift);
-# pragma aux ASR_LL = \
-"and	eax,eax" \
-"jle	asrdn" \
-"asrlp:" \
-"sar	dword ptr[esi+4],1" \
-"rcr	dword ptr[esi],1" \
-"dec	eax" \
-"jne	asrlp" \
-"asrdn:" \
-parm[esi] [eax];
-
-
-/* Convert int to LONGLONGCH */
-
-void IntToLL(LONGLONGCH *a, int *b);
-# pragma aux IntToLL = \
-"mov	eax,[esi]" \
-"cdq" \
-"mov	[edi],eax" \
-"mov	[edi+4],edx" \
-parm[edi] [esi] \
-modify[eax edx];
-
-
-
-
-
-
-
-
-
-/*
-
- Fixed Point Multiply.
-
-
- 16.16 * 16.16 -> 16.16
- or
- 16.16 * 0.32 -> 0.32
-
- A proper version of this function ought to read
- 16.16 * 16.16 -> 32.16
- but this would require a long long result
-
- Algorithm:
-
- Take the mid 32 bits of the 64 bit result
-
-*/
-
-/*
-	These functions have been checked for suitability for 
-	a Pentium and look as if they would work adequately.
-	Might be worth a more detailed look at optimising
-	them though.
-*/
-
-#if 0
-
-int MUL_FIXED(int a, int b);
-# pragma aux MUL_FIXED = \
-"imul edx" \
-"mov ax,dx" \
-"rol eax,16" \
-parm[eax] [edx] \
-value[eax] \
-modify[edx];
-
-#else
-
-int MUL_FIXED(int a, int b);
-# pragma aux MUL_FIXED = \
-"imul edx" \
-"shrd	eax,edx,16" \
-parm[eax] [edx] \
-value[eax] \
-modify[edx];
-
-#endif
-
-
-/*
-
- Fixed Point Divide - returns a / b
-
-*/
-
-int DIV_FIXED(int a, int b);
-# pragma aux DIV_FIXED = \
-"cdq" \
-"rol eax,16" \
-"mov dx,ax" \
-"xor ax,ax" \
-"idiv ebx" \
-parm[eax] [ebx] \
-value[eax] \
-modify[edx];
-
-
-
-
-/*
-
- Multiply and Divide Functions.
-
-*/
-
-
-/*
-
- 32/32 division
-
- This macro is a function on some other platforms
-
-*/
-
-#define DIV_INT(a, b) ((a) / (b))
-
-
-
-
-/*
-
- A Narrowing 64/32 Division
-
-*/
-
-int NarrowDivide(LONGLONGCH *a, int b);
-# pragma aux NarrowDivide = \
-"mov	eax,[esi]" \
-"mov	edx,[esi+4]" \
-"idiv	ebx" \
-parm[esi] [ebx] \
-value[eax] \
-modify[edx];
-
-
-
-/*
-
- This function performs a Widening Multiply followed by a Narrowing Divide.
-
- a = (a * b) / c
-
-*/
-
-int WideMulNarrowDiv(int a, int b, int c);
-# pragma aux WideMulNarrowDiv = \
-"imul	edx"\
-"idiv	ebx" \
-parm[eax] [edx] [ebx] \
-value[eax];
-
-
-
-/*
-
- Function to rotate a VECTORCH using a MATRIXCH
-
- This is the C function
-
-	x =  MUL_FIXED(m->mat11, v->vx);
-	x += MUL_FIXED(m->mat21, v->vy);
-	x += MUL_FIXED(m->mat31, v->vz);
-
-	y  = MUL_FIXED(m->mat12, v->vx);
-	y += MUL_FIXED(m->mat22, v->vy);
-	y += MUL_FIXED(m->mat32, v->vz);
-
-	z  = MUL_FIXED(m->mat13, v->vx);
-	z += MUL_FIXED(m->mat23, v->vy);
-	z += MUL_FIXED(m->mat33, v->vz);
-
-	v->vx = x;
-	v->vy = y;
-	v->vz = z;
-
- This is the MUL_FIXED inline assembler function
-
-	imul edx
-	shrd eax,edx,16
-
-
-typedef struct matrixch {
-
-	int mat11;	0
-	int mat12;	4
-	int mat13;	8
-
-	int mat21;	12
-	int mat22;	16
-	int mat23;	20
-
-	int mat31;	24
-	int mat32;	28
-	int mat33;	32
-
-} MATRIXCH;
-
-*/
-
-void RotateVector_ASM(VECTORCH *v, MATRIXCH *m);
-# pragma aux RotateVector_ASM = \
-\
-"push	eax" \
-"push	ebx" \
-"push	ecx" \
-"push	edx" \
-"push	ebp" \
-\
-"mov	eax,[edi + 0]" \
-"imul	DWORD PTR [esi + 0]" \
-"shrd	eax,edx,16" \
-"mov	ecx,eax"\
-"mov	eax,[edi + 12]" \
-"imul	DWORD PTR [esi + 4]" \
-"shrd	eax,edx,16" \
-"add	ecx,eax" \
-"mov	eax,[edi + 24]" \
-"imul	DWORD PTR [esi + 8]" \
-"shrd	eax,edx,16" \
-"add	ecx,eax" \
-\
-"mov	eax,[edi + 4]" \
-"imul	DWORD PTR [esi + 0]" \
-"shrd	eax,edx,16" \
-"mov	ebx,eax"\
-"mov	eax,[edi + 16]" \
-"imul	DWORD PTR [esi + 4]" \
-"shrd	eax,edx,16" \
-"add	ebx,eax" \
-"mov	eax,[edi + 28]" \
-"imul	DWORD PTR [esi + 8]" \
-"shrd	eax,edx,16" \
-"add	ebx,eax" \
-\
-"mov	eax,[edi + 8]" \
-"imul	DWORD PTR [esi + 0]" \
-"shrd	eax,edx,16" \
-"mov	ebp,eax"\
-"mov	eax,[edi + 20]" \
-"imul	DWORD PTR [esi + 4]" \
-"shrd	eax,edx,16" \
-"add	ebp,eax" \
-"mov	eax,[edi + 32]" \
-"imul	DWORD PTR [esi + 8]" \
-"shrd	eax,edx,16" \
-"add	ebp,eax" \
-\
-"mov	[esi + 0],ecx" \
-"mov	[esi + 4],ebx" \
-"mov	[esi + 8],ebp" \
-\
-"pop	ebp" \
-"pop	edx" \
-"pop	ecx" \
-"pop	ebx" \
-"pop	eax" \
-\
-parm[esi] [edi];
-
-
-/*
-
- Here is the same function, this time copying the result to a second vector
-
-*/
-
-void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
-# pragma aux RotateAndCopyVector_ASM = \
-\
-"push	eax" \
-"push	ebx" \
-"push	ecx" \
-"push	ebp" \
-\
-"push	edx" \
-"mov	eax,[edi + 0]" \
-"imul	DWORD PTR [esi + 0]" \
-"shrd	eax,edx,16" \
-"mov	ecx,eax"\
-"mov	eax,[edi + 12]" \
-"imul	DWORD PTR [esi + 4]" \
-"shrd	eax,edx,16" \
-"add	ecx,eax" \
-"mov	eax,[edi + 24]" \
-"imul	DWORD PTR [esi + 8]" \
-"shrd	eax,edx,16" \
-"add	ecx,eax" \
-\
-"mov	eax,[edi + 4]" \
-"imul	DWORD PTR [esi + 0]" \
-"shrd	eax,edx,16" \
-"mov	ebx,eax"\
-"mov	eax,[edi + 16]" \
-"imul	DWORD PTR [esi + 4]" \
-"shrd	eax,edx,16" \
-"add	ebx,eax" \
-"mov	eax,[edi + 28]" \
-"imul	DWORD PTR [esi + 8]" \
-"shrd	eax,edx,16" \
-"add	ebx,eax" \
-\
-"mov	eax,[edi + 8]" \
-"imul	DWORD PTR [esi + 0]" \
-"shrd	eax,edx,16" \
-"mov	ebp,eax"\
-"mov	eax,[edi + 20]" \
-"imul	DWORD PTR [esi + 4]" \
-"shrd	eax,edx,16" \
-"add	ebp,eax" \
-"mov	eax,[edi + 32]" \
-"imul	DWORD PTR [esi + 8]" \
-"shrd	eax,edx,16" \
-"add	ebp,eax" \
-\
-"pop	edx" \
-"mov	[edx + 0],ecx" \
-"mov	[edx + 4],ebx" \
-"mov	[edx + 8],ebp" \
-\
-"pop	ebp" \
-"pop	ecx" \
-"pop	ebx" \
-"pop	eax" \
-\
-parm[esi] [edx] [edi];
-
-
-
-
-#if (SupportFPMathsFunctions || SupportFPSquareRoot)
-
-/*
-
- Square Root
-
- Returns the Square Root of a 32-bit number
-
-*/
-
-static long temp;
-static long temp2;
-
-int SqRoot32(int A);
-# pragma aux SqRoot32 = \
-"finit" \
-"mov	temp,eax" \
-"fild temp" \
-"fsqrt" \
-"fistp temp2" \
-"fwait" \
-"mov	eax,temp2" \
-parm[eax] \
-value[eax];
-
-#endif
-
-
-/*
-
- This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
- the function call "CHP" used by the WATCOM compiler.
-
-*/
-
-static float fptmp;
-static int itmp;
-
-void FloatToInt(void);
-# pragma aux FloatToInt = \
-"fld fptmp" \
-"fistp itmp";
-
-/*
-
- This macro makes usage of the above function easier and more elegant
-
-*/
-
-#define f2i(a, b) { \
-fptmp = (b); \
-FloatToInt(); \
-a = itmp;}
-
-#elif defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */
+#if defined(_MSC_VER) && 0 /* inline assember for the Microsoft compiler */
 
 /* ADD */
 
@@ -1111,7 +585,7 @@ a = itmp;}
 
 #else
 
-#if 1 /* GCC! */
+/* inline assembly has been moved to mathline.c */
 void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
 void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
 void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
@@ -1147,752 +621,6 @@ fti_fptmp = (b); \
 FloatToInt(); \
 a = fti_itmp;}
 
-#else /* inline stuff */
-
-/* ADD */
-
-
-static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
-{
-/*
-	_asm
-	{
-		mov esi,a
-		mov edi,b
-		mov ebx,c
-		mov	eax,[esi]
-		mov	edx,[esi+4]
-		add	eax,[edi]
-		adc	edx,[edi+4]
-		mov	[ebx],eax
-		mov	[ebx+4],edx
-	}
-*/
-
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"addl	0(%%edi), %%eax		\n\t"
-	"adcl	4(%%edi), %%edx		\n\t"
-	"movl	%%eax, 0(%%ebx)		\n\t"
-	"movl	%%edx, 4(%%ebx)		\n\t"
-	: 
-	: "S" (a), "D" (b), "b" (c)
-	: "%eax", "%edx", "memory", "cc"
-	);
-
-/*
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"addl	0(%%edi), %%eax		\n\t"
-	"adcl	4(%%edi), %%edx		\n\t"
-	: "=a" (c->lo32), "=d" (c->hi32)
-	: "S" (a), "D" (b)
-	);
-*/
-}
-
-/* ADD ++ */
-
-static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
-{
-/*
-	_asm
-	{
-		mov edi,c
-		mov esi,a
-		mov	eax,[esi]
-		mov	edx,[esi+4]
-		add	[edi],eax
-		adc	[edi+4],edx
-	}
-*/
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"addl	%%eax, 0(%%edi)		\n\t"
-	"adcl	%%edx, 4(%%edi)		\n\t"
-	:
-	: "D" (c), "S" (a)
-	: "%eax", "%edx", "memory", "cc"
-	);
-}
-
-/* SUB */
-
-static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
-{
-/*
-	_asm
-	{
-		mov esi,a
-		mov edi,b
-		mov ebx,c
-		mov	eax,[esi]
-		mov	edx,[esi+4]
-		sub	eax,[edi]
-		sbb	edx,[edi+4]
-		mov	[ebx],eax
-		mov	[ebx+4],edx
-	}
-*/
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"subl	0(%%edi), %%eax		\n\t"
-	"sbbl	4(%%edi), %%edx		\n\t"
-	"movl	%%eax, 0(%%ebx)		\n\t"
-	"movl	%%edx, 4(%%ebx)		\n\t"
-	:
-	: "S" (a), "D" (b), "b" (c)
-	: "%eax", "%edx", "memory", "cc"
-	);
-}
-
-/* SUB -- */
-
-static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
-{
-/*
-	_asm
-	{
-		mov edi,c
-		mov esi,a
-		mov	eax,[esi]
-		mov	edx,[esi+4]
-		sub	[edi],eax
-		sbb	[edi+4],edx
-	}
-*/
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"subl	%%eax, 0(%%edi)		\n\t"
-	"sbbl	%%edx, 4(%%edi)		\n\t"
-	:
-	: "D" (c), "S" (a)
-	: "%eax", "%edx", "memory", "cc"
-	);
-}
-
-/*
-
- MUL
-
- This is the multiply we use, the 32 x 32 = 64 widening version
-
-*/
-
-static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
-{
-/*
-	_asm
-	{
-		mov eax,a
-		mov ebx,c
-		imul b
-		mov	[ebx],eax
-		mov	[ebx+4],edx
-	}
-*/
-__asm__("imull	%2			\n\t"
-	"movl	%%eax, 0(%%ebx)		\n\t"
-	"movl	%%edx, 4(%%ebx)		\n\t"
-	:
-	: "a" (a), "b" (c), "q" (b)
-	: "%edx", "memory", "cc"
-	);
-}
-
-/*
-
- CMP
-
- This substitutes for ==, >, <, >=, <=
-
-*/
-
-static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
-{
-	int retval;
-/*
-	_asm
-	{
-		mov ebx,a
-		mov ecx,b
-		mov	eax,[ebx]
-		mov	edx,[ebx+4]
-		sub	eax,[ecx]
-		sbb	edx,[ecx+4]
-		and	edx,edx
-		jne	llnz
-		and	eax,eax
-		je	llgs
-		llnz:
-		mov	retval,1
-		and	edx,edx
-		jge	llgs
-		neg	retval
-		llgs:
-	}
-*/
-/* TODO */
-__asm__("movl	0(%%ebx), %%eax		\n\t"
-	"movl	4(%%ebx), %%edx		\n\t"
-	"subl	0(%%ecx), %%eax		\n\t"
-	"sbbl	4(%%ecx), %%edx		\n\t"
-	"xorl	%0, %0                  \n\t" /* hopefully it doesn't pick %eax or %edx */
-	"andl	%%edx, %%edx		\n\t"
-	"jne	0			\n\t" /* llnz */
-	"andl	%%eax, %%eax		\n\t"
-	"je	1			\n"   /* llgs */
-"0:					\n\t" /* llnz */
-	"movl	$1, %0			\n\t"
-	"andl	%%edx, %%edx		\n\t"
-	"jge	1			\n\t" /* llgs */
-	"negl	%0			\n"
-"1:					\n\t" /* llgs */
-	: "=r" (retval)
-	: "b" (a), "c" (b)
-	: "%eax", "%edx", "memory", "cc"
-	);
-	
-	return retval;
-}
-
-/* EQUALS */
-
-static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
-{
-/*
-	_asm
-	{
-		mov edi,a
-		mov esi,b
-		mov	eax,[esi]
-		mov	edx,[esi+4]
-		mov	[edi],eax
-		mov	[edi+4],edx
-	}
-*/
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"movl	%%eax, 0(%%edi)		\n\t"
-	"movl	%%edx, 4(%%edi)		\n\t"
-	:
-	: "D" (a), "S" (b)
-	: "%eax", "%edx", "memory"
-	);
-}
-
-/* NEGATE */
-
-static __inline__ void NEG_LL(LONGLONGCH *a)
-{
-/*
-	_asm
-	{
-		mov esi,a
-		not	dword ptr[esi]
-		not	dword ptr[esi+4]
-		add	dword ptr[esi],1
-		adc	dword ptr[esi+4],0
-	}
-*/
-__asm__("notl	0(%%esi)		\n\t"
-	"notl	4(%%esi)		\n\t"
-	"addl	$1, 0(%%esi)		\n\t"
-	"adcl	$0, 4(%%esi)		\n\t"
-	:
-	: "S" (a)
-	: "memory", "cc"
-	);
-}
-
-/* ASR */
-
-static __inline__ void ASR_LL(LONGLONGCH *a, int shift)
-{
-/*
-	_asm
-	{
-		mov esi,a
-		mov eax,shift
-		and	eax,eax
-		jle	asrdn
-		asrlp:
-		sar	dword ptr[esi+4],1
-		rcr	dword ptr[esi],1
-		dec	eax
-		jne	asrlp
-		asrdn:
-	}
-*/
-__asm__("andl	%%eax, %%eax		\n\t"
-	"jle	0			\n" /* asrdn */
-"1:					\n\t" /* asrlp */
-	"sarl	$1, 4(%%esi)		\n\t"
-	"rcrl	$1, 0(%%esi)		\n\t"
-	"decl	%%eax			\n\t"
-	"jne	1			\n"
-"0:					\n\t"
-	:
-	: "S" (a), "a" (shift)
-	: "memory", "cc"
-	);
-	
-}
-
-/* Convert int to LONGLONGCH */
-
-static __inline__ void IntToLL(LONGLONGCH *a, int *b)
-{
-/*
-	_asm
-	{
-		mov esi,b
-		mov edi,a
-		mov	eax,[esi]
-		cdq
-		mov	[edi],eax
-		mov	[edi+4],edx
-	}
-*/
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"cdq				\n\t"
-	"movl	%%eax, 0(%%edi)		\n\t"
-	"movl	%%edx, 4(%%edi)		\n\t"
-	:
-	: "S" (b), "D" (a)
-	: "%eax", "%edx", "memory", "cc"
-	);
-
-}
-
-/*
-
- Fixed Point Multiply.
-
-
- 16.16 * 16.16 -> 16.16
- or
- 16.16 * 0.32 -> 0.32
-
- A proper version of this function ought to read
- 16.16 * 16.16 -> 32.16
- but this would require a long long result
-
- Algorithm:
-
- Take the mid 32 bits of the 64 bit result
-
-*/
-
-/*
-	These functions have been checked for suitability for 
-	a Pentium and look as if they would work adequately.
-	Might be worth a more detailed look at optimising
-	them though.
-*/
-
-static __inline__ int MUL_FIXED(int a, int b)
-{
-	int retval;
-/*
-	_asm
-	{
-		mov eax,a
-		imul b
-		shrd eax,edx,16
-		mov retval,eax
-	}
-*/
-/* TODO */
-__asm__("imull	%2			\n\t"
-	"shrdl	$16, %%edx, %%eax	\n\t"
-	: "=a" (retval)
-	: "a" (a), "q" (b)
-	: "%edx", "cc"
-	);
-	return retval;
-}
-
-/*
-
- Fixed Point Divide - returns a / b
-
-*/
-
-static __inline__ int DIV_FIXED(int a, int b)
-{
-	int retval;
-/*
-	_asm
-	{
-		mov eax,a
-		cdq
-		rol eax,16
-		mov dx,ax
-		xor ax,ax
-		idiv b
-		mov retval,eax
-	}
-*/
-/* TODO */
-__asm__("cdq				\n\t"
-	"roll	$16, %%eax		\n\t"
-	"mov	%%ax, %%dx		\n\t"
-	"xor	%%ax, %%ax		\n\t"
-	"idivl	%2			\n\t"
-	: "=a" (retval)
-	: "a" (a), "q" (b)
-	: "%edx", "cc"
-	);
-	return retval;
-}
-
-/*
-
- Multiply and Divide Functions.
-
-*/
-
-
-/*
-
- 32/32 division
-
- This macro is a function on some other platforms
-
-*/
-
-#define DIV_INT(a, b) ((a) / (b))
-
-/*
-
- A Narrowing 64/32 Division
-
-*/
-
-static __inline__ int NarrowDivide(LONGLONGCH *a, int b)
-{
-	int retval;
-/*
-	_asm
-	{
-		mov esi,a
-		mov	eax,[esi]
-		mov	edx,[esi+4]
-		idiv	b
-		mov retval,eax
-	}
-*/
-__asm__("movl	0(%%esi), %%eax		\n\t"
-	"movl	4(%%esi), %%edx		\n\t"
-	"idivl	%2			\n\t"
-	: "=a" (retval)
-	: "S" (a), "q" (b)
-	: "%edx", "cc"
-	);
-	return retval;
-}
-
-/*
-
- This function performs a Widening Multiply followed by a Narrowing Divide.
-
- a = (a * b) / c
-
-*/
-
-static __inline__ int WideMulNarrowDiv(int a, int b, int c)
-{
-#if 0 /* TODO: broken? */
-	int retval;
-/*
-	_asm
-	{
-		mov eax,a
-		imul b
-		idiv c
-		mov retval,eax
-	}
-*/
-/* TODO */
-__asm__("imull	%2			\n\t"
-	"idivl	%3			\n\t"
-	: "=a" (retval)
-	: "a" (a), "q" (b), "q" (c)
-	: "cc"
-	);	
-	return retval;
-#endif
-	return (a * b) / c;	
-}
-
-/*
-
- Function to rotate a VECTORCH using a MATRIXCH
-
- This is the C function
-
-	x =  MUL_FIXED(m->mat11, v->vx);
-	x += MUL_FIXED(m->mat21, v->vy);
-	x += MUL_FIXED(m->mat31, v->vz);
-
-	y  = MUL_FIXED(m->mat12, v->vx);
-	y += MUL_FIXED(m->mat22, v->vy);
-	y += MUL_FIXED(m->mat32, v->vz);
-
-	z  = MUL_FIXED(m->mat13, v->vx);
-	z += MUL_FIXED(m->mat23, v->vy);
-	z += MUL_FIXED(m->mat33, v->vz);
-
-	v->vx = x;
-	v->vy = y;
-	v->vz = z;
-
- This is the MUL_FIXED inline assembler function
-
-	imul edx
-	shrd eax,edx,16
-
-
-typedef struct matrixch {
-
-	int mat11;	0
-	int mat12;	4
-	int mat13;	8
-
-	int mat21;	12
-	int mat22;	16
-	int mat23;	20
-
-	int mat31;	24
-	int mat32;	28
-	int mat33;	32
-
-} MATRIXCH;
-
-*/
-
-#if 0 /* TODO if these are needed */
-static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m)
-{
-	_asm
-	{
-		mov esi,v
-		mov edi,m
-
-		mov	eax,[edi + 0]
-		imul	DWORD PTR [esi + 0]
-		shrd	eax,edx,16
-		mov	ecx,eax
-		mov	eax,[edi + 12]
-		imul	DWORD PTR [esi + 4]
-		shrd	eax,edx,16
-		add	ecx,eax
-		mov	eax,[edi + 24]
-		imul	DWORD PTR [esi + 8]
-		shrd	eax,edx,16
-		add	ecx,eax
-
-		mov	eax,[edi + 4]
-		imul	DWORD PTR [esi + 0]
-		shrd	eax,edx,16
-		mov	ebx,eax
-		mov	eax,[edi + 16]
-		imul	DWORD PTR [esi + 4]
-		shrd	eax,edx,16
-		add	ebx,eax
-		mov	eax,[edi + 28]
-		imul	DWORD PTR [esi + 8]
-		shrd	eax,edx,16
-		add	ebx,eax
-
-		mov	eax,[edi + 8]
-		imul	DWORD PTR [esi + 0]
-		shrd	eax,edx,16
-		mov	ebp,eax
-		mov	eax,[edi + 20]
-		imul	DWORD PTR [esi + 4]
-		shrd	eax,edx,16
-		add	ebp,eax
-		mov	eax,[edi + 32]
-		imul	DWORD PTR [esi + 8]
-		shrd	eax,edx,16
-		add	ebp,eax
-
-		mov	[esi + 0],ecx
-		mov	[esi + 4],ebx
-		mov	[esi + 8],ebp
-	}
-}
-
-/*
-
- Here is the same function, this time copying the result to a second vector
-
-*/
-
-static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m)
-{
-	_asm
-	{
-		mov esi,v1
-		mov edi,m
-
-		mov	eax,[edi + 0]
-		imul	DWORD PTR [esi + 0]
-		shrd	eax,edx,16
-		mov	ecx,eax
-		mov	eax,[edi + 12]
-		imul	DWORD PTR [esi + 4]
-		shrd	eax,edx,16
-		add	ecx,eax
-		mov	eax,[edi + 24]
-		imul	DWORD PTR [esi + 8]
-		shrd	eax,edx,16
-		add	ecx,eax
-
-		mov	eax,[edi + 4]
-		imul	DWORD PTR [esi + 0]
-		shrd	eax,edx,16
-		mov	ebx,eax
-		mov	eax,[edi + 16]
-		imul	DWORD PTR [esi + 4]
-		shrd	eax,edx,16
-		add	ebx,eax
-		mov	eax,[edi + 28]
-		imul	DWORD PTR [esi + 8]
-		shrd	eax,edx,16
-		add	ebx,eax
-
-		mov	eax,[edi + 8]
-		imul	DWORD PTR [esi + 0]
-		shrd	eax,edx,16
-		mov	ebp,eax
-		mov	eax,[edi + 20]
-		imul	DWORD PTR [esi + 4]
-		shrd	eax,edx,16
-		add	ebp,eax
-		mov	eax,[edi + 32]
-		imul	DWORD PTR [esi + 8]
-		shrd	eax,edx,16
-		add	ebp,eax
-
-		mov edx,v2
-		mov	[edx + 0],ecx
-		mov	[edx + 4],ebx
-		mov	[edx + 8],ebp
-	}
-}
-#endif
-
-#if (SupportFPMathsFunctions || SupportFPSquareRoot)
-
-/*
-
- Square Root
-
- Returns the Square Root of a 32-bit number
-
-*/
-
-extern int sqrt_temp1;
-extern int sqrt_temp2;
-
-#include <math.h>
-static __inline__ int SqRoot32(int A)
-{
-#if 0
-	sqrt_temp1 = A;
-/*
-	_asm
-	{
-		finit
-		fild A
-		fsqrt
-		fistp temp2
-		fwait
-	}
-*/
-
-__asm__("finit				\n\t"
-	"fild	sqrt_temp1		\n\t"
-	"fsqrt				\n\t"
-	"fistp	sqrt_temp2		\n\t"
-	"fwait				\n\t"
-	:
-	:
-	: "memory", "cc"
-	);
-	
-	return sqrt_temp2;
-#endif
-{ /* TODO: clean this please */
-	double x = A;
-	double retvald = sqrt(x);
-	int retval = retvald;
-	return retval;
-}
-}
-
-#endif
-
-
-/*
-
- This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
- the function call "CHP" used by the WATCOM compiler.
-
-*/
-
-extern float fti_fptmp;
-extern int fti_itmp;
-
-static __inline__ int FloatToInt(float fptmp)
-{
-#if 0
-	fti_fptmp = fptmp;
-/*
-	_asm
-	{
-		fld fptmp
-		fistp itmp
-	}
-*/
-__asm__("fld	fti_fptmp		\n\t"
-	"fistp	fti_itmp		\n\t"
-	:
-	:
-	: "memory", "cc"
-	);
-
-	return fti_itmp;
-#endif	
-
-	return fptmp;
-}
-
-/*
-
- This macro makes usage of the above function easier and more elegant
-
-*/
-
-#define f2i(a, b) { \
-a = FloatToInt(b); \
-}
-
-
-#if 0
-int SqRoot32(int A);
-void FloatToInt();
-#define f2i(a, b) { \
-fti_fptmp = (b); \
-FloatToInt(); \
-a = fti_itmp;}
-#endif
-
-#endif 
-
 #endif
 
 int WideMul2NarrowDiv(int a, int b, int c, int d, int e);