diff options
| author | Steven Fuller <relnev@icculus.org> | 2001-07-01 00:55:22 +0000 |
|---|---|---|
| committer | Patryk Obara <dreamer.tan@gmail.com> | 2019-08-20 02:09:04 +0200 |
| commit | 2186d5f3f95cd74a070a490d899291648d58667a (patch) | |
| tree | 55241a1afa3e1a22e0b6593a8dead0b703800f44 /3dc/win95/MMX_MATH.H | |
| parent | 218ca90543758a20ac326e444ca0643174ca7384 (diff) | |
Initial revision
Diffstat (limited to '3dc/win95/MMX_MATH.H')
| -rw-r--r-- | 3dc/win95/MMX_MATH.H | 469 |
1 files changed, 0 insertions, 469 deletions
diff --git a/3dc/win95/MMX_MATH.H b/3dc/win95/MMX_MATH.H deleted file mode 100644 index 594ac0e..0000000 --- a/3dc/win95/MMX_MATH.H +++ /dev/null @@ -1,469 +0,0 @@ -#ifndef _included_mmx_math_h_ -#define _included_mmx_math_h_ - -#if SUPPORT_MMX - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/* -Calling-convention independent -definitions of inline MMX assembler -functions and declarations for non- -inline MMX assembler functions -*/ - -/* SPECIFICATION */ -/* -Dot Product and Vector Transform functions take -arguments referencing matrices or vectors whose -elements are 32 bit signed integers and arranged as -follows. All integers (including the results) are -in 16.16 fixed point form - ie. The 64-bit results -are shifted down 16 bits (divided by 65536) before -being written back as 32-bit values. Results are -rounded down (towards negative infinity). - -the matrix structure looks like this (not ideal!) -[ +00 +0c +18 ] -[ +04 +10 +1c ] -[ +08 +14 +20 ] - -and the vector structure looks like this -[ +00 ] -[ +04 ] -[ +08 ] -*/ - -/* TYPICAL CHARACTERISTICS */ -/* -Accuracy - -Internal rounding errors may be propogated, and -the results may not be exact. For the Dot Product -result and the Vector Transform results (x,y and z -independently), the error distributions are all -the same, as follows: - -Exact: 25% --1: 50% --2: 25% - -Better accuracy can be obtained by adding 1 to each integer result, -but this will produce poor results in the case of nice simple round -numbers, eg Dot({1.0,0.0,0.0},{0.0,1.0,0.0}) gives 1 not 0! - -Speed - -The DotProduct Takes 33 cycles (not including call instruction) -The inline DotProduct takes 30+1 cycles (the last instruction is pairable) -All Vector transforms take 63 cycles. These figures assume no -stalls due to cache misses or misaligned data. A matrix multiply -or cross product could be supplied if it is thought they would -be necessary - - -For optimal performance, it is recommended that vector and -matrix structures should be aligned to EIGHT byte boundaries. -To ensure this in arrays of vectors/matrices, the structure -should contain a dummy padding 32-bit value (recommended). -*/ - -/* storage class specifier for assembler calls */ - -#ifdef __WATCOMC__ -#define _asmcall -#define _asminline -#elif defined(_MSC_VER) -#define _asmcall static __inline -#define _asminline static __inline -#else -#error "Unknown compiler" -#endif - -/* forward reference declared in global scope */ -struct vectorch; -struct matrixch; - -/***********************/ -/* F-U-N-C-T-I-O-N */ -/* P-R-O-T-O-T-Y-P-E-S */ -/* F-O-R A-L-L */ -/* P-U-B-L-I-C */ -/* F-U-N-C-T-I-O-N-S */ -/***********************/ - -/* overwrites the input vector with the new vector */ -_asmcall void MMX_VectorTransform(struct vectorch * vector, struct matrixch const * matrix); -/* fills a new vector with the result of the input vector transformed by the matrix */ -_asmcall void MMX_VectorTransformed(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix); -/* overwrites the input vector with the new vector, then adds another vector */ -_asmcall void MMX_VectorTransformAndAdd(struct vectorch * vector, struct matrixch const * matrix, struct vectorch const * v_add); -/* fills a new vector with the result of the input vector transformed by the matrix then added to another vector */ -_asmcall void MMX_VectorTransformedAndAdd(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix, struct vectorch const * v_add); -/* compute dot product */ -_asmcall signed MMX_VectorDot(struct vectorch const * v1, struct vectorch const * v2); -/* this one assumes all the input vector elements are in the range [-32768,32767] */ -_asmcall signed MMX_VectorDot16(struct vectorch const * v1, struct vectorch const * v2); - -/* inline versions */ -_asminline signed MMXInline_VectorDot(struct vectorch const * v1, struct vectorch const * v2); -_asminline signed MMXInline_VectorDot16(struct vectorch const * v1, struct vectorch const * v2); - -/*****************/ -/* PRIVATE PARTS */ -/*****************/ - -/* Assembler labels */ -extern void MMXAsm_VectorTransform(void); -extern void MMXAsm_VectorTransformed(void); -extern void MMXAsm_VectorTransformAndAdd(void); -extern void MMXAsm_VectorTransformedAndAdd(void); -extern void MMXAsm_VectorDot(void); -extern void MMXAsm_VectorDot16(void); - -/* inline calls to MMX functions with correct parameters set */ -#ifdef __WATCOMC__ - -#pragma aux MMX_VectorTransform = "call MMXAsm_VectorTransform" parm [eax] [edx]; -#pragma aux MMX_VectorTransformed = "call MMXAsm_VectorTransformed" parm [eax] [edx] [ecx]; -#pragma aux MMX_VectorTransformAndAdd = "call MMXAsm_VectorTransformAndAdd" parm [eax] [edx] [ecx]; -#pragma aux MMX_VectorTransformedAndAdd = "call MMXAsm_VectorTransformedAndAdd" parm [eax] [edx] [ecx] [ebx]; -#pragma aux MMX_VectorDot = "call MMXAsm_VectorDot" parm [eax] [edx] value [eax]; -#pragma aux MMX_VectorDot16 = "call MMXAsm_VectorDot16" parm [eax] [edx] value [eax]; - -#elif defined(_MSC_VER) - -_asmcall void MMX_VectorTransform(struct vectorch * vector, struct matrixch const * matrix) -{ - _asm - { - mov eax,vector - mov edx,matrix - call MMXAsm_VectorTransform - } -} -_asmcall void MMX_VectorTransformed(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix) -{ - _asm - { - mov eax,v_result - mov edx,v_parm - mov ecx,matrix - call MMXAsm_VectorTransformed - } -} -_asmcall void MMX_VectorTransformAndAdd(struct vectorch * vector, struct matrixch const * matrix, struct vectorch const * v_add) -{ - _asm - { - mov eax,vector - mov edx,matrix - mov ecx,v_add - call MMXAsm_VectorTransformAndAdd - } -} -_asmcall void MMX_VectorTransformedAndAdd(struct vectorch * v_result, struct vectorch const * v_parm, struct matrixch const * matrix, struct vectorch const * v_add) -{ - _asm - { - mov eax,v_result - mov edx,v_parm - mov ecx,matrix - mov ebx,v_add - call MMXAsm_VectorTransformedAndAdd - } -} -_asmcall signed MMX_VectorDot(struct vectorch const * v1, struct vectorch const * v2) -{ - signed retval; - _asm - { - mov eax,v1 - mov edx,v2 - call MMXAsm_VectorDot - mov retval,eax - } - return retval; -} -_asmcall signed MMX_VectorDot16(struct vectorch const * v1, struct vectorch const * v2) -{ - signed retval; - _asm - { - mov eax,v1 - mov edx,v2 - call MMXAsm_VectorDot16 - mov retval,eax - } - return retval; -} - -#else - -#error "Unknown compiler" - -#endif - - -/* Cross product? Mod? MatrixMultiply? */ - -/* globals */ - -extern int use_mmx_math; - -/* inline functions - no call */ - -extern __int64 const mmx_sign_mask; -extern __int64 const mmx_one_fixed_h; - -#ifdef __WATCOMC__ - -#pragma aux MMXInline_VectorDot = \ -\ -" movq mm0,[edx]" \ -\ -" movd mm2,[edx+08h]" \ -" movq mm4,mm0" \ -\ -" pand mm4,mmx_sign_mask" \ -" movq mm6,mm2" \ -\ -" movq mm1,[eax]" \ -" paddd mm4,mm4" \ -\ -" movd mm3,[eax+08h]" \ -" movq mm5,mm1" \ -\ -" pand mm6,mmx_sign_mask" \ -" movq mm7,mm3" \ -\ -" pand mm5,mmx_sign_mask" \ -" paddd mm6,mm6" \ -\ -" pand mm7,mmx_sign_mask" \ -" paddd mm5,mm5" \ -\ -" paddd mm0,mm4" \ -" paddd mm2,mm6" \ -\ -" paddd mm7,mm7" \ -" movq mm4,mm2" \ -\ -" punpcklwd mm4,mm0" \ -" paddd mm1,mm5" \ -\ -" punpckhwd mm2,mm0" \ -" paddd mm3,mm7" \ -\ -" movq mm5,mm3" \ -" punpckhwd mm3,mm1" \ -\ -" punpcklwd mm5,mm1" \ -" movq mm0,mm2" \ -\ -" movq mm1,mm4" \ -" pmaddwd mm0,mm3" \ -\ -" movq mm6,mm3" \ -" psrlq mm3,32" \ -\ -" movq mm7,mm5" \ -" punpckldq mm3,mm6" \ -\ -" pmaddwd mm1,mm5" \ -" psrlq mm5,32" \ -\ -" punpckldq mm5,mm7" \ -" pmaddwd mm2,mm3" \ -\ -" pmaddwd mm4,mm5" \ -" movq mm3,mm0" \ -\ -" punpckldq mm0,mm1" \ -\ -" psubd mm0,mmx_one_fixed_h" \ -" punpckhdq mm1,mm3" \ -\ -" psrad mm0,16" \ -" paddd mm2,mm4" \ -\ -" pslld mm1,16" \ -" paddd mm2,mm0" \ -\ -" paddd mm2,mm1" \ -\ -" movq mm1,mm2" \ -" psrlq mm2,32" \ -\ -" paddd mm1,mm2" \ -\ -" movd eax,mm1" \ -\ -" emms" \ -\ -" inc eax" \ -\ -parm [eax] [edx] value [eax]; - -#pragma aux MMXInline_VectorDot16 = \ -\ -" movd mm0,[edx+08h]" \ -\ -" packssdw mm0,[edx]" \ -\ -" movd mm1,[eax+08h]" \ -\ -" packssdw mm1,[eax]" \ -\ -" pmaddwd mm0,mm1" \ -\ -" movq mm1,mm0" \ -" psrlq mm0,32" \ -\ -" paddd mm0,mm1" \ -\ -" movd eax,mm0" \ -\ -" emms" \ -\ -parm [eax] [edx] value [eax]; - -#elif defined(_MSC_VER) - -_asminline signed MMXInline_VectorDot(struct vectorch const * v1, struct vectorch const * v2) -{ - signed retval; - _asm - { - mov edx,v1 - mov eax,v2 - - movq mm0,[edx] - - movd mm2,[edx+08h] - movq mm4,mm0 - - pand mm4,mmx_sign_mask - movq mm6,mm2 - - movq mm1,[eax] - paddd mm4,mm4 - - movd mm3,[eax+08h] - movq mm5,mm1 - - pand mm6,mmx_sign_mask - movq mm7,mm3 - - pand mm5,mmx_sign_mask - paddd mm6,mm6 - - pand mm7,mmx_sign_mask - paddd mm5,mm5 - - paddd mm0,mm4 - paddd mm2,mm6 - - paddd mm7,mm7 - movq mm4,mm2 - - punpcklwd mm4,mm0 - paddd mm1,mm5 - - punpckhwd mm2,mm0 - paddd mm3,mm7 - - movq mm5,mm3 - punpckhwd mm3,mm1 - - punpcklwd mm5,mm1 - movq mm0,mm2 - - movq mm1,mm4 - pmaddwd mm0,mm3 - - movq mm6,mm3 - psrlq mm3,32 - - movq mm7,mm5 - punpckldq mm3,mm6 - - pmaddwd mm1,mm5 - psrlq mm5,32 - - punpckldq mm5,mm7 - pmaddwd mm2,mm3 - - pmaddwd mm4,mm5 - movq mm3,mm0 - - punpckldq mm0,mm1 - - psubd mm0,mmx_one_fixed_h - punpckhdq mm1,mm3 - - psrad mm0,16 - paddd mm2,mm4 - - pslld mm1,16 - paddd mm2,mm0 - - paddd mm2,mm1 - - movq mm1,mm2 - psrlq mm2,32 - - paddd mm1,mm2 - - movd retval,mm1 - - emms - } - return retval+1; -} - -_asminline signed MMXInline_VectorDot16(struct vectorch const * v1, struct vectorch const * v2) -{ - signed retval; - _asm - { - mov eax,v1 - mov edx,v2 - - movd mm0,[edx+08h] - - packssdw mm0,[edx] - - movd mm1,[eax+08h] - - packssdw mm1,[eax] - - pmaddwd mm0,mm1 - - movq mm1,mm0 - psrlq mm0,32 - - paddd mm0,mm1 - - movd retval,mm0 - - emms - } - return retval; -} - -#else - -#error "Unknown compiler" - -#endif - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* SUPPORT_MMX */ - -#endif /* ! _included_mmx_math_h_ */ |
