29 #include "../precompiled.h"
45 #if defined(MACOS_X) && defined(__i386__)
53 return "MMX & SSE & SSE2 & SSE3";
58 #include <xmmintrin.h>
60 #define SHUFFLEPS( x, y, z, w ) (( (x) & 3 ) << 6 | ( (y) & 3 ) << 4 | ( (z) & 3 ) << 2 | ( (w) & 3 ))
61 #define R_SHUFFLEPS( x, y, z, w ) (( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))
62 #define SHUFFLEPD( x, y ) (( (x) & 1 ) << 1 | ( (y) & 1 ))
63 #define R_SHUFFLEPD( x, y ) (( (y) & 1 ) << 1 | ( (x) & 1 ))
127 #define RSCALE( s ) ( (s&2)<<5 ) | ( (s&4)<<5 ) | ( (s&8)<<3 ) | ( (s&8)<<4 )
129 #define ADDRESS_ADDC( reg0, constant ) 0x40 | ( reg0 & 7 ) \
132 #define ADDRESS_ADDR( reg0, reg1 ) 0x04 \
133 _asm _emit ( ( reg1 & 7 ) << 3 ) | ( reg0 & 7 )
135 #define ADDRESS_ADDRC( reg0, reg1, constant ) 0x44 \
136 _asm _emit ( ( reg1 & 7 ) << 3 ) | ( reg0 & 7 ) \
139 #define ADDRESS_SCALEADDR( reg0, reg1, scale ) 0x04 \
140 _asm _emit ( ( reg1 & 7 ) << 3 ) | ( reg0 & 7 ) | RSCALE( scale )
142 #define ADDRESS_SCALEADDRC( reg0, reg1, scale, constant ) 0x44 \
143 _asm _emit ( ( reg1 & 7 ) << 3 ) | ( reg0 & 7 ) | RSCALE( scale ) \
148 #define addsubps( dst, src ) \
152 _asm _emit ( ( dst & 7 ) << 3 ) | src
155 #define addsubpd( dst, src ) \
159 _asm _emit ( ( dst & 7 ) << 3 ) | src
162 #define haddps( dst, src ) \
166 _asm _emit ( ( dst & 7 ) << 3 ) | src
169 #define haddpd( dst, src ) \
173 _asm _emit ( ( dst & 7 ) << 3 ) | src
176 #define hsubps( dst, src ) \
180 _asm _emit ( ( dst & 7 ) << 3 ) | src
183 #define hsubpd( dst, src ) \
187 _asm _emit ( ( dst & 7 ) << 3 ) | src
190 #define movsldup( dst, src ) \
194 _asm _emit ( ( dst & 7 ) << 3 ) | src
197 #define movdldup( dst, src ) \
201 _asm _emit ( ( dst & 7 ) << 3 ) | src
204 #define movshdup( dst, src ) \
208 _asm _emit ( ( dst & 7 ) << 3 ) | src
211 #define movdhdup( dst, src ) \
215 _asm _emit ( ( dst & 7 ) << 3 ) | src
218 #define lddqu( dst, src ) \
222 _asm _emit ( ( dst & 7 ) << 3 ) | src
225 #define DRAWVERT_SIZE 60
226 #define DRAWVERT_XYZ_OFFSET (0*4)
227 #define DRAWVERT_ST_OFFSET (3*4)
228 #define DRAWVERT_NORMAL_OFFSET (5*4)
229 #define DRAWVERT_TANGENT0_OFFSET (8*4)
230 #define DRAWVERT_TANGENT1_OFFSET (11*4)
231 #define DRAWVERT_COLOR_OFFSET (14*4)
233 #define JOINTQUAT_SIZE (7*4)
234 #define JOINTMAT_SIZE (4*3*4)
235 #define JOINTWEIGHT_SIZE (4*4)
250 haddps( _xmm0, _xmm0 )
251 haddps( _xmm0, _xmm0 )
263 return "MMX & SSE & SSE2 & SSE3";
284 imul eax, DRAWVERT_SIZE
299 add esi, JOINTWEIGHT_SIZE
302 mulps xmm0, [edi+ebx+ 0]
303 mulps xmm1, [edi+ebx+16]
304 mulps xmm2, [edi+ebx+32]
306 cmp
dword ptr [edx-4], 0
315 add esi, JOINTWEIGHT_SIZE
318 mulps xmm3, [edi+ebx+ 0]
319 mulps xmm4, [edi+ebx+16]
320 mulps xmm5, [edi+ebx+32]
322 cmp
dword ptr [edx-4], 0
331 add eax, DRAWVERT_SIZE
333 haddps( _xmm0, _xmm1 )
334 haddps( _xmm2, _xmm0 )
336 movhps [ecx+eax-DRAWVERT_SIZE+0], xmm2
338 haddps( _xmm2, _xmm2 )
340 movss [ecx+eax-DRAWVERT_SIZE+8], xmm2
349 const byte *jointsPtr = (
byte *)joints;
351 for( j = i = 0; i < numVerts; i++ ) {
354 v = ( *(
idJointMat *) ( jointsPtr + index[j*2+0] ) ) * weights[j];
355 while( index[j*2+1] == 0 ) {
357 v += ( *(
idJointMat *) ( jointsPtr + index[j*2+0] ) ) * weights[j];
virtual void VPCALL TransformVerts(idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights)
assert(prefInfo.fullscreenBtn)
GLfloat GLfloat GLfloat v2
virtual const char *VPCALL GetName(void) const