Initial revision

author: Steven Fuller <relnev@icculus.org> 2001-07-01 00:55:22 +0000
committer: Patryk Obara <dreamer.tan@gmail.com> 2019-08-20 02:09:04 +0200
commit: 2186d5f3f95cd74a070a490d899291648d58667a (patch)
tree: 55241a1afa3e1a22e0b6593a8dead0b703800f44 /3dc/win95/MMX_MATH.H
parent: 218ca90543758a20ac326e444ca0643174ca7384 (diff)
1 files changed, 0 insertions, 469 deletions
diff --git a/3dc/win95/MMX_MATH.H b/3dc/win95/MMX_MATH.H
deleted file mode 100644
index 594ac0e..0000000
--- a/3dc/win95/MMX_MATH.H
+++ /dev/null
@@ -1,469 +0,0 @@
-#ifndef _included_mmx_math_h_
-#define _included_mmx_math_h_
-
-#if SUPPORT_MMX
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*
-Calling-convention independent
-definitions of inline MMX assembler
-functions and declarations for non-
-inline MMX assembler functions
-*/
-
-/* SPECIFICATION */
-/*
-Dot Product and Vector Transform functions take
-arguments referencing matrices or vectors whose
-elements are 32 bit signed integers and arranged as
-follows. All integers (including the results) are
-in 16.16 fixed point form - ie. The 64-bit results
-are shifted down 16 bits (divided by 65536) before
-being written back as 32-bit values. Results are
-rounded down (towards negative infinity).
-
-the matrix structure looks like this (not ideal!)
-[ +00 +0c +18 ]
-[ +04 +10 +1c ]
-[ +08 +14 +20 ]
-
-and the vector structure looks like this
-[ +00 ]
-[ +04 ]
-[ +08 ]
-*/
-
-/* TYPICAL CHARACTERISTICS */
-/*
-Accuracy
-
-Internal rounding errors may be propogated, and
-the results may not be exact. For the Dot Product
-result and the Vector Transform results (x,y and z
-independently), the error distributions are all
-the same, as follows:
-
-Exact:	25%
--1:   	50%
--2:   	25%
-
-Better accuracy can be obtained by adding 1 to each integer result,
-but this will produce poor results in the case of nice simple round
-numbers, eg Dot({1.0,0.0,0.0},{0.0,1.0,0.0}) gives 1 not 0!
-
-Speed
-
-The DotProduct Takes 33 cycles (not including call instruction)
-The inline DotProduct takes 30+1 cycles (the last instruction is pairable)
-All Vector transforms take 63 cycles. These figures assume no
-stalls due to cache misses or misaligned data. A matrix multiply
-or cross product could be supplied if it is thought they would
-be necessary
-
-
-For optimal performance, it is recommended that vector and
-matrix structures should be aligned to EIGHT byte boundaries.
-To ensure this in arrays of vectors/matrices, the structure
-should contain a dummy padding 32-bit value (recommended).
-*/
-
-/* storage class specifier for assembler calls */
-
-#ifdef __WATCOMC__
-#define _asmcall
-#define _asminline
-#elif defined(_MSC_VER)
-#define _asmcall static __inline
-#define _asminline static __inline
-#else
-#error "Unknown compiler"
-#endif
-
-/* forward reference declared in global scope */
-struct vectorch;
-struct matrixch;
-
-/***********************/
-/* F-U-N-C-T-I-O-N     */
-/* P-R-O-T-O-T-Y-P-E-S */
-/* F-O-R   A-L-L       */
-/* P-U-B-L-I-C         */
-/* F-U-N-C-T-I-O-N-S   */
-/***********************/
-
-/* overwrites the input vector with the new vector */
-_asmcall void MMX_VectorTransform(struct vectorch * vector, struct matrixch const * matrix);
-/* fills a new vector with the result of the input vector transformed by the matrix */
-_asmcall void MMX_VectorTransformed(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix);
-/* overwrites the input vector with the new vector, then adds another vector */
-_asmcall void MMX_VectorTransformAndAdd(struct vectorch * vector, struct matrixch const * matrix, struct vectorch const * v_add);
-/* fills a new vector with the result of the input vector transformed by the matrix then added to another vector */
-_asmcall void MMX_VectorTransformedAndAdd(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix, struct vectorch const * v_add);
-/* compute dot product */
-_asmcall signed MMX_VectorDot(struct vectorch const * v1, struct vectorch const * v2);
-/* this one assumes all the input vector elements are in the range [-32768,32767] */
-_asmcall signed MMX_VectorDot16(struct vectorch const * v1, struct vectorch const * v2);
-
-/* inline versions */
-_asminline signed MMXInline_VectorDot(struct vectorch const * v1, struct vectorch const * v2);
-_asminline signed MMXInline_VectorDot16(struct vectorch const * v1, struct vectorch const * v2);
-
-/*****************/
-/* PRIVATE PARTS */
-/*****************/
-
-/* Assembler labels */
-extern void MMXAsm_VectorTransform(void);
-extern void MMXAsm_VectorTransformed(void);
-extern void MMXAsm_VectorTransformAndAdd(void);
-extern void MMXAsm_VectorTransformedAndAdd(void);
-extern void MMXAsm_VectorDot(void);
-extern void MMXAsm_VectorDot16(void);
-
-/* inline calls to MMX functions with correct parameters set */
-#ifdef __WATCOMC__
-
-#pragma aux MMX_VectorTransform = "call MMXAsm_VectorTransform" parm [eax] [edx];
-#pragma aux MMX_VectorTransformed = "call MMXAsm_VectorTransformed" parm [eax] [edx] [ecx];
-#pragma aux MMX_VectorTransformAndAdd = "call MMXAsm_VectorTransformAndAdd" parm [eax] [edx] [ecx];
-#pragma aux MMX_VectorTransformedAndAdd = "call MMXAsm_VectorTransformedAndAdd" parm [eax] [edx] [ecx] [ebx];
-#pragma aux MMX_VectorDot = "call MMXAsm_VectorDot" parm [eax] [edx] value [eax];
-#pragma aux MMX_VectorDot16 = "call MMXAsm_VectorDot16" parm [eax] [edx] value [eax];
-
-#elif defined(_MSC_VER)
-
-_asmcall void MMX_VectorTransform(struct vectorch * vector, struct matrixch const * matrix)
-{
-	_asm
-	{
-		mov eax,vector
-		mov edx,matrix
-		call MMXAsm_VectorTransform
-	}
-}
-_asmcall void MMX_VectorTransformed(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix)
-{
-	_asm
-	{
-		mov eax,v_result
-		mov edx,v_parm
-		mov ecx,matrix
-		call MMXAsm_VectorTransformed
-	}
-}
-_asmcall void MMX_VectorTransformAndAdd(struct vectorch * vector, struct matrixch const * matrix, struct vectorch const * v_add)
-{
-	_asm
-	{
-		mov eax,vector
-		mov edx,matrix
-		mov ecx,v_add
-		call MMXAsm_VectorTransformAndAdd
-	}
-}
-_asmcall void MMX_VectorTransformedAndAdd(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix, struct vectorch const * v_add)
-{
-	_asm
-	{
-		mov eax,v_result
-		mov edx,v_parm
-		mov ecx,matrix
-		mov ebx,v_add
-		call MMXAsm_VectorTransformedAndAdd
-	}
-}
-_asmcall signed MMX_VectorDot(struct vectorch const * v1, struct vectorch const * v2)
-{
-	signed retval;
-	_asm
-	{
-		mov eax,v1
-		mov edx,v2
-		call MMXAsm_VectorDot
-		mov retval,eax
-	}
-	return retval;
-}
-_asmcall signed MMX_VectorDot16(struct vectorch const * v1, struct vectorch const * v2)
-{
-	signed retval;
-	_asm
-	{
-		mov eax,v1
-		mov edx,v2
-		call MMXAsm_VectorDot16
-		mov retval,eax
-	}
-	return retval;
-}
-
-#else
-
-#error "Unknown compiler"
-
-#endif
-
-
-/* Cross product? Mod? MatrixMultiply? */
-
-/* globals */
-
-extern int use_mmx_math;
-
-/* inline functions - no call */
-
-extern __int64 const mmx_sign_mask;
-extern __int64 const mmx_one_fixed_h;
-
-#ifdef __WATCOMC__
-
-#pragma aux MMXInline_VectorDot = \
-\
-"	movq mm0,[edx]" \
-\
-"	movd mm2,[edx+08h]" \
-"	movq mm4,mm0" \
-\
-"	pand mm4,mmx_sign_mask" \
-"	movq mm6,mm2" \
-\
-"	movq mm1,[eax]" \
-"	paddd mm4,mm4" \
-\
-"	movd mm3,[eax+08h]" \
-"	movq mm5,mm1" \
-\
-"	pand mm6,mmx_sign_mask" \
-"	movq mm7,mm3" \
-\
-"	pand mm5,mmx_sign_mask" \
-"	paddd mm6,mm6" \
-\
-"	pand mm7,mmx_sign_mask" \
-"	paddd mm5,mm5" \
-\
-"	paddd mm0,mm4" \
-"	paddd mm2,mm6" \
-\
-"	paddd mm7,mm7" \
-"	movq mm4,mm2" \
-\
-"	punpcklwd mm4,mm0" \
-"	paddd mm1,mm5" \
-\
-"	punpckhwd mm2,mm0" \
-"	paddd mm3,mm7" \
-\
-"	movq mm5,mm3" \
-"	punpckhwd mm3,mm1" \
-\
-"	punpcklwd mm5,mm1" \
-"	movq mm0,mm2" \
-\
-"	movq mm1,mm4" \
-"	pmaddwd mm0,mm3" \
-\
-"	movq mm6,mm3" \
-"	psrlq mm3,32" \
-\
-"	movq mm7,mm5" \
-"	punpckldq mm3,mm6" \
-\
-"	pmaddwd mm1,mm5" \
-"	psrlq mm5,32" \
-\
-"	punpckldq mm5,mm7" \
-"	pmaddwd mm2,mm3" \
-\
-"	pmaddwd mm4,mm5" \
-"	movq mm3,mm0" \
-\
-"	punpckldq mm0,mm1" \
-\
-"	psubd mm0,mmx_one_fixed_h" \
-"	punpckhdq mm1,mm3" \
-\
-"	psrad mm0,16" \
-"	paddd mm2,mm4" \
-\
-"	pslld mm1,16" \
-"	paddd mm2,mm0" \
-\
-"	paddd mm2,mm1" \
-\
-"	movq mm1,mm2" \
-"	psrlq mm2,32" \
-\
-"	paddd mm1,mm2" \
-\
-"	movd eax,mm1" \
-\
-"	emms" \
-\
-"	inc eax" \
-\
-parm [eax] [edx] value [eax];
-
-#pragma aux MMXInline_VectorDot16 = \
-\
-"	movd mm0,[edx+08h]" \
-\
-"	packssdw mm0,[edx]" \
-\
-"	movd mm1,[eax+08h]" \
-\
-"	packssdw mm1,[eax]" \
-\
-"	pmaddwd mm0,mm1" \
-\
-"	movq mm1,mm0" \
-"	psrlq mm0,32" \
-\
-"	paddd mm0,mm1" \
-\
-"	movd eax,mm0" \
-\
-"	emms" \
-\
-parm [eax] [edx] value [eax];
-
-#elif defined(_MSC_VER)
-
-_asminline signed MMXInline_VectorDot(struct vectorch const * v1, struct vectorch const * v2)
-{
-	signed retval;
-	_asm
-	{
-		mov edx,v1
-		mov eax,v2
-
-		movq mm0,[edx]
-
-		movd mm2,[edx+08h]
-		movq mm4,mm0
-
-		pand mm4,mmx_sign_mask
-		movq mm6,mm2
-
-		movq mm1,[eax]
-		paddd mm4,mm4
-
-		movd mm3,[eax+08h]
-		movq mm5,mm1
-
-		pand mm6,mmx_sign_mask
-		movq mm7,mm3
-
-		pand mm5,mmx_sign_mask
-		paddd mm6,mm6
-
-		pand mm7,mmx_sign_mask
-		paddd mm5,mm5
-
-		paddd mm0,mm4
-		paddd mm2,mm6
-
-		paddd mm7,mm7
-		movq mm4,mm2
-
-		punpcklwd mm4,mm0
-		paddd mm1,mm5
-
-		punpckhwd mm2,mm0
-		paddd mm3,mm7
-
-		movq mm5,mm3
-		punpckhwd mm3,mm1
-
-		punpcklwd mm5,mm1
-		movq mm0,mm2
-
-		movq mm1,mm4
-		pmaddwd mm0,mm3
-
-		movq mm6,mm3
-		psrlq mm3,32
-
-		movq mm7,mm5
-		punpckldq mm3,mm6
-
-		pmaddwd mm1,mm5
-		psrlq mm5,32
-
-		punpckldq mm5,mm7
-		pmaddwd mm2,mm3
-
-		pmaddwd mm4,mm5
-		movq mm3,mm0
-
-		punpckldq mm0,mm1
-
-		psubd mm0,mmx_one_fixed_h
-		punpckhdq mm1,mm3
-
-		psrad mm0,16
-		paddd mm2,mm4
-
-		pslld mm1,16
-		paddd mm2,mm0
-
-		paddd mm2,mm1
-
-		movq mm1,mm2
-		psrlq mm2,32
-
-		paddd mm1,mm2
-
-		movd retval,mm1
-
-		emms
-	}
-	return retval+1;
-}
-
-_asminline signed MMXInline_VectorDot16(struct vectorch const * v1, struct vectorch const * v2)
-{
-	signed retval;
-	_asm
-	{
-		mov eax,v1
-		mov edx,v2
-
-		movd mm0,[edx+08h]
-
-		packssdw mm0,[edx]
-
-		movd mm1,[eax+08h]
-
-		packssdw mm1,[eax]
-
-		pmaddwd mm0,mm1
-
-		movq mm1,mm0
-		psrlq mm0,32
-
-		paddd mm0,mm1
-
-		movd retval,mm0
-
-		emms
-	}
-	return retval;
-}
-
-#else
-
-#error "Unknown compiler"
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif /* SUPPORT_MMX */
-
-#endif /* ! _included_mmx_math_h_ */
author	Steven Fuller <relnev@icculus.org>	2001-07-01 00:55:22 +0000
committer	Patryk Obara <dreamer.tan@gmail.com>	2019-08-20 02:09:04 +0200
commit	2186d5f3f95cd74a070a490d899291648d58667a (patch)
tree	55241a1afa3e1a22e0b6593a8dead0b703800f44 /3dc/win95/MMX_MATH.H
parent	218ca90543758a20ac326e444ca0643174ca7384 (diff)