29 #include "../precompiled.h"
41 #define UNROLL1(Y) { int _IX; for (_IX=0;_IX<count;_IX++) {Y(_IX);} }
42 #define UNROLL2(Y) { int _IX, _NM = count&0xfffffffe; for (_IX=0;_IX<_NM;_IX+=2){Y(_IX+0);Y(_IX+1);} if (_IX < count) {Y(_IX);}}
43 #define UNROLL4(Y) { int _IX, _NM = count&0xfffffffc; for (_IX=0;_IX<_NM;_IX+=4){Y(_IX+0);Y(_IX+1);Y(_IX+2);Y(_IX+3);}for(;_IX<count;_IX++){Y(_IX);}}
44 #define UNROLL8(Y) { int _IX, _NM = count&0xfffffff8; for (_IX=0;_IX<_NM;_IX+=8){Y(_IX+0);Y(_IX+1);Y(_IX+2);Y(_IX+3);Y(_IX+4);Y(_IX+5);Y(_IX+6);Y(_IX+7);} _NM = count&0xfffffffe; for(;_IX<_NM;_IX+=2){Y(_IX); Y(_IX+1);} if (_IX < count) {Y(_IX);} }
47 #define NODEFAULT default: assert( 0 )
49 #define NODEFAULT default: __assume( 0 )
61 return "generic code";
72 #define OPER(X) dst[(X)] = src[(X)] + constant;
85 #define OPER(X) dst[(X)] = src0[(X)] + src1[(X)];
99 #define OPER(X) dst[(X)] = c - src[(X)];
112 #define OPER(X) dst[(X)] = src0[(X)] - src1[(X)];
126 #define OPER(X) (dst[(X)] = (c * src0[(X)]))
139 #define OPER(X) (dst[(X)] = src0[(X)] * src1[(X)])
153 #define OPER(X) (dst[(X)] = (c / divisor[(X)]))
166 #define OPER(X) (dst[(X)] = src0[(X)] / src1[(X)])
180 #define OPER(X) (dst[(X)] += c * src[(X)])
193 #define OPER(X) (dst[(X)] += src0[(X)] * src1[(X)])
207 #define OPER(X) (dst[(X)] -= c * src[(X)])
220 #define OPER(X) (dst[(X)] -= src0[(X)] * src1[(X)])
233 #define OPER(X) dst[(X)] = constant * src[(X)];
246 #define OPER(X) dst[(X)] = constant * src[(X)].Normal() + src[(X)][3];
259 #define OPER(X) dst[(X)] = constant * src[(X)].xyz;
272 #define OPER(X) dst[(X)] = constant.Normal() * src[(X)] + constant[3];
285 #define OPER(X) dst[(X)] = constant.Normal() * src[(X)].Normal() + constant[3] * src[(X)][3];
298 #define OPER(X) dst[(X)] = constant.Normal() * src[(X)].xyz + constant[3];
311 #define OPER(X) dst[(X)] = src0[(X)] * src1[(X)];
332 dot = src1[0] * src2[0];
336 dot = src1[0] * src2[0] + src1[1] * src2[1];
340 dot = src1[0] * src2[0] + src1[1] * src2[1] + src1[2] * src2[2];
345 double s0, s1, s2, s3;
346 s0 = src1[0] * src2[0];
347 s1 = src1[1] * src2[1];
348 s2 = src1[2] * src2[2];
349 s3 = src1[3] * src2[3];
350 for ( i = 4; i < count-7; i += 8 ) {
351 s0 += src1[i+0] * src2[i+0];
352 s1 += src1[i+1] * src2[i+1];
353 s2 += src1[i+2] * src2[i+2];
354 s3 += src1[i+3] * src2[i+3];
355 s0 += src1[i+4] * src2[i+4];
356 s1 += src1[i+5] * src2[i+5];
357 s2 += src1[i+6] * src2[i+6];
358 s3 += src1[i+7] * src2[i+7];
360 switch( count - i ) {
362 case 7: s0 += src1[i+6] * src2[i+6];
363 case 6: s1 += src1[i+5] * src2[i+5];
364 case 5: s2 += src1[i+4] * src2[i+4];
365 case 4: s3 += src1[i+3] * src2[i+3];
366 case 3: s0 += src1[i+2] * src2[i+2];
367 case 2: s1 += src1[i+1] * src2[i+1];
368 case 1: s2 += src1[i+0] * src2[i+0];
384 dot += src1[
i] * src2[
i];
398 #define OPER(X) dst[(X)] = src0[(X)] > constant;
411 #define OPER(X) dst[(X)] |= ( src0[(X)] > constant ) << bitNum;
424 #define OPER(X) dst[(X)] = src0[(X)] >= constant;
437 #define OPER(X) dst[(X)] |= ( src0[(X)] >= constant ) << bitNum;
450 #define OPER(X) dst[(X)] = src0[(X)] < constant;
463 #define OPER(X) dst[(X)] |= ( src0[(X)] < constant ) << bitNum;
476 #define OPER(X) dst[(X)] = src0[(X)] <= constant;
489 #define OPER(X) dst[(X)] |= ( src0[(X)] <= constant ) << bitNum;
501 #define OPER(X) if ( src[(X)] < min ) {min = src[(X)];} if ( src[(X)] > max ) {max = src[(X)];}
513 #define OPER(X) const idVec2 &v = src[(X)]; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; }
525 #define OPER(X) const idVec3 &v = src[(X)]; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; } if ( v[2] < min[2] ) { min[2] = v[2]; } if ( v[2] > max[2] ) { max[2] = v[2]; }
537 #define OPER(X) const idVec3 &v = src[(X)].xyz; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; } if ( v[2] < min[2] ) { min[2] = v[2]; } if ( v[2] > max[2] ) { max[2] = v[2]; }
549 #define OPER(X) const idVec3 &v = src[indexes[(X)]].xyz; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; } if ( v[2] < min[2] ) { min[2] = v[2]; } if ( v[2] > max[2] ) { max[2] = v[2]; }
560 #define OPER(X) dst[(X)] = src[(X)] < min ? min : src[(X)] > max ? max : src[(X)];
571 #define OPER(X) dst[(X)] = src[(X)] < min ? min : src[(X)];
582 #define OPER(X) dst[(X)] = src[(X)] > max ? max : src[(X)];
593 memcpy( dst, src, count );
602 memset( dst, val, count );
611 memset( dst, 0, count *
sizeof(
float ) );
620 unsigned int *ptr =
reinterpret_cast<unsigned int *
>(
dst);
621 #define OPER(X) ptr[(X)] ^= ( 1 << 31 ) // IEEE 32 bits float sign bit
632 #define OPER(X) dst[(X)] = src[(X)]
643 #define OPER(X) dst[(X)] = src1[(X)] + src2[(X)]
654 #define OPER(X) dst[(X)] = src1[(X)] - src2[(X)]
665 #define OPER(X) dst[(X)] = src1[(X)] * constant
676 #define OPER(X) dst[(X)] += src[(X)]
687 #define OPER(X) dst[(X)] -= src[(X)]
698 #define OPER(X) dst[(X)] *= constant
710 const float *mPtr, *vPtr;
722 for ( i = 0; i < numRows; i++ ) {
723 dstPtr[
i] = mPtr[0] * vPtr[0];
728 for ( i = 0; i < numRows; i++ ) {
729 dstPtr[
i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1];
734 for ( i = 0; i < numRows; i++ ) {
735 dstPtr[
i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2];
740 for ( i = 0; i < numRows; i++ ) {
741 dstPtr[
i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
747 for ( i = 0; i < numRows; i++ ) {
748 dstPtr[
i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
749 mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4];
754 for ( i = 0; i < numRows; i++ ) {
755 dstPtr[
i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
756 mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4] + mPtr[5] * vPtr[5];
762 for ( i = 0; i < numRows; i++ ) {
763 float sum = mPtr[0] * vPtr[0];
764 for ( j = 1; j < numColumns; j++ ) {
765 sum += mPtr[
j] * vPtr[
j];
781 const float *mPtr, *vPtr;
793 for ( i = 0; i < numRows; i++ ) {
794 dstPtr[
i] += mPtr[0] * vPtr[0];
799 for ( i = 0; i < numRows; i++ ) {
800 dstPtr[
i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1];
805 for ( i = 0; i < numRows; i++ ) {
806 dstPtr[
i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2];
811 for ( i = 0; i < numRows; i++ ) {
812 dstPtr[
i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
818 for ( i = 0; i < numRows; i++ ) {
819 dstPtr[
i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
820 mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4];
825 for ( i = 0; i < numRows; i++ ) {
826 dstPtr[
i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
827 mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4] + mPtr[5] * vPtr[5];
833 for ( i = 0; i < numRows; i++ ) {
834 float sum = mPtr[0] * vPtr[0];
835 for ( j = 1; j < numColumns; j++ ) {
836 sum += mPtr[
j] * vPtr[
j];
852 const float *mPtr, *vPtr;
864 for ( i = 0; i < numRows; i++ ) {
865 dstPtr[
i] -= mPtr[0] * vPtr[0];
870 for ( i = 0; i < numRows; i++ ) {
871 dstPtr[
i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1];
876 for ( i = 0; i < numRows; i++ ) {
877 dstPtr[
i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2];
882 for ( i = 0; i < numRows; i++ ) {
883 dstPtr[
i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
889 for ( i = 0; i < numRows; i++ ) {
890 dstPtr[
i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
891 mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4];
896 for ( i = 0; i < numRows; i++ ) {
897 dstPtr[
i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
898 mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4] + mPtr[5] * vPtr[5];
904 for ( i = 0; i < numRows; i++ ) {
905 float sum = mPtr[0] * vPtr[0];
906 for ( j = 1; j < numColumns; j++ ) {
907 sum += mPtr[
j] * vPtr[
j];
922 int i,
j, numColumns;
923 const float *mPtr, *vPtr;
935 for ( i = 0; i < numColumns; i++ ) {
936 dstPtr[
i] = *(mPtr) * vPtr[0];
941 for ( i = 0; i < numColumns; i++ ) {
942 dstPtr[
i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1];
947 for ( i = 0; i < numColumns; i++ ) {
948 dstPtr[
i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2];
953 for ( i = 0; i < numColumns; i++ ) {
954 dstPtr[
i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
955 *(mPtr+3*numColumns) * vPtr[3];
960 for ( i = 0; i < numColumns; i++ ) {
961 dstPtr[
i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
962 *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4];
967 for ( i = 0; i < numColumns; i++ ) {
968 dstPtr[
i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
969 *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4] + *(mPtr+5*numColumns) * vPtr[5];
975 for ( i = 0; i < numColumns; i++ ) {
977 float sum = mPtr[0] * vPtr[0];
978 for ( j = 1; j < numRows; j++ ) {
980 sum += mPtr[0] * vPtr[
j];
994 int i,
j, numColumns;
995 const float *mPtr, *vPtr;
1007 for ( i = 0; i < numColumns; i++ ) {
1008 dstPtr[
i] += *(mPtr) * vPtr[0];
1013 for ( i = 0; i < numColumns; i++ ) {
1014 dstPtr[
i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1];
1019 for ( i = 0; i < numColumns; i++ ) {
1020 dstPtr[
i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2];
1025 for ( i = 0; i < numColumns; i++ ) {
1026 dstPtr[
i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
1027 *(mPtr+3*numColumns) * vPtr[3];
1032 for ( i = 0; i < numColumns; i++ ) {
1033 dstPtr[
i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
1034 *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4];
1039 for ( i = 0; i < numColumns; i++ ) {
1040 dstPtr[
i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
1041 *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4] + *(mPtr+5*numColumns) * vPtr[5];
1047 for ( i = 0; i < numColumns; i++ ) {
1049 float sum = mPtr[0] * vPtr[0];
1050 for ( j = 1; j < numRows; j++ ) {
1052 sum += mPtr[0] * vPtr[
j];
1067 const float *mPtr, *vPtr;
1079 for ( i = 0; i < numColumns; i++ ) {
1080 dstPtr[
i] -= *(mPtr) * vPtr[0];
1085 for ( i = 0; i < numColumns; i++ ) {
1086 dstPtr[
i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1];
1091 for ( i = 0; i < numColumns; i++ ) {
1092 dstPtr[
i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2];
1097 for ( i = 0; i < numColumns; i++ ) {
1098 dstPtr[
i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
1099 *(mPtr+3*numColumns) * vPtr[3];
1104 for ( i = 0; i < numColumns; i++ ) {
1105 dstPtr[
i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
1106 *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4];
1111 for ( i = 0; i < numColumns; i++ ) {
1112 dstPtr[
i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
1113 *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4] + *(mPtr+5*numColumns) * vPtr[5];
1119 for ( i = 0; i < numColumns; i++ ) {
1121 float sum = mPtr[0] * vPtr[0];
1122 for (
int j = 1;
j < numRows;
j++ ) {
1124 sum += mPtr[0] * vPtr[
j];
1149 const float *m1Ptr, *m2Ptr;
1163 for ( i = 0; i < k; i++ ) {
1164 *dstPtr++ = m1Ptr[
i] * m2Ptr[0];
1165 *dstPtr++ = m1Ptr[
i] * m2Ptr[1];
1166 *dstPtr++ = m1Ptr[
i] * m2Ptr[2];
1167 *dstPtr++ = m1Ptr[
i] * m2Ptr[3];
1168 *dstPtr++ = m1Ptr[
i] * m2Ptr[4];
1169 *dstPtr++ = m1Ptr[
i] * m2Ptr[5];
1173 for ( i = 0; i < k; i++ ) {
1175 for ( j = 0; j <
l; j++ ) {
1176 *dstPtr++ = m1Ptr[0] * m2Ptr[0];
1185 for ( i = 0; i < k; i++ ) {
1186 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6];
1187 *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7];
1188 *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8];
1189 *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9];
1190 *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10];
1191 *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11];
1196 for ( i = 0; i < k; i++ ) {
1198 for ( j = 0; j <
l; j++ ) {
1199 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[
l];
1208 for ( i = 0; i < k; i++ ) {
1209 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6] + m1Ptr[2] * m2Ptr[12];
1210 *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7] + m1Ptr[2] * m2Ptr[13];
1211 *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8] + m1Ptr[2] * m2Ptr[14];
1212 *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9] + m1Ptr[2] * m2Ptr[15];
1213 *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10] + m1Ptr[2] * m2Ptr[16];
1214 *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11] + m1Ptr[2] * m2Ptr[17];
1219 for ( i = 0; i < k; i++ ) {
1221 for ( j = 0; j <
l; j++ ) {
1222 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[
l] + m1Ptr[2] * m2Ptr[2*
l];
1231 for ( i = 0; i < k; i++ ) {
1232 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6] + m1Ptr[2] * m2Ptr[12] + m1Ptr[3] * m2Ptr[18];
1233 *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7] + m1Ptr[2] * m2Ptr[13] + m1Ptr[3] * m2Ptr[19];
1234 *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8] + m1Ptr[2] * m2Ptr[14] + m1Ptr[3] * m2Ptr[20];
1235 *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9] + m1Ptr[2] * m2Ptr[15] + m1Ptr[3] * m2Ptr[21];
1236 *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10] + m1Ptr[2] * m2Ptr[16] + m1Ptr[3] * m2Ptr[22];
1237 *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11] + m1Ptr[2] * m2Ptr[17] + m1Ptr[3] * m2Ptr[23];
1242 for ( i = 0; i < k; i++ ) {
1244 for ( j = 0; j <
l; j++ ) {
1245 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[
l] + m1Ptr[2] * m2Ptr[2*
l] +
1246 m1Ptr[3] * m2Ptr[3*
l];
1255 for ( i = 0; i < k; i++ ) {
1256 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6] + m1Ptr[2] * m2Ptr[12] + m1Ptr[3] * m2Ptr[18] + m1Ptr[4] * m2Ptr[24];
1257 *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7] + m1Ptr[2] * m2Ptr[13] + m1Ptr[3] * m2Ptr[19] + m1Ptr[4] * m2Ptr[25];
1258 *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8] + m1Ptr[2] * m2Ptr[14] + m1Ptr[3] * m2Ptr[20] + m1Ptr[4] * m2Ptr[26];
1259 *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9] + m1Ptr[2] * m2Ptr[15] + m1Ptr[3] * m2Ptr[21] + m1Ptr[4] * m2Ptr[27];
1260 *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10] + m1Ptr[2] * m2Ptr[16] + m1Ptr[3] * m2Ptr[22] + m1Ptr[4] * m2Ptr[28];
1261 *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11] + m1Ptr[2] * m2Ptr[17] + m1Ptr[3] * m2Ptr[23] + m1Ptr[4] * m2Ptr[29];
1266 for ( i = 0; i < k; i++ ) {
1268 for ( j = 0; j <
l; j++ ) {
1269 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[
l] + m1Ptr[2] * m2Ptr[2*
l] +
1270 m1Ptr[3] * m2Ptr[3*
l] + m1Ptr[4] * m2Ptr[4*
l];
1281 dstPtr[0] = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[1] + m1Ptr[2] * m2Ptr[2] +
1282 m1Ptr[3] * m2Ptr[3] + m1Ptr[4] * m2Ptr[4] + m1Ptr[5] * m2Ptr[5];
1289 for ( i = 0; i < 2; i++ ) {
1290 for ( j = 0; j < 2; j++ ) {
1291 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 2 +
j ]
1292 + m1Ptr[1] * m2Ptr[ 1 * 2 +
j ]
1293 + m1Ptr[2] * m2Ptr[ 2 * 2 +
j ]
1294 + m1Ptr[3] * m2Ptr[ 3 * 2 +
j ]
1295 + m1Ptr[4] * m2Ptr[ 4 * 2 +
j ]
1296 + m1Ptr[5] * m2Ptr[ 5 * 2 +
j ];
1307 for ( i = 0; i < 3; i++ ) {
1308 for ( j = 0; j < 3; j++ ) {
1309 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 3 +
j ]
1310 + m1Ptr[1] * m2Ptr[ 1 * 3 +
j ]
1311 + m1Ptr[2] * m2Ptr[ 2 * 3 +
j ]
1312 + m1Ptr[3] * m2Ptr[ 3 * 3 +
j ]
1313 + m1Ptr[4] * m2Ptr[ 4 * 3 +
j ]
1314 + m1Ptr[5] * m2Ptr[ 5 * 3 +
j ];
1325 for ( i = 0; i < 4; i++ ) {
1326 for ( j = 0; j < 4; j++ ) {
1327 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 4 +
j ]
1328 + m1Ptr[1] * m2Ptr[ 1 * 4 +
j ]
1329 + m1Ptr[2] * m2Ptr[ 2 * 4 +
j ]
1330 + m1Ptr[3] * m2Ptr[ 3 * 4 +
j ]
1331 + m1Ptr[4] * m2Ptr[ 4 * 4 +
j ]
1332 + m1Ptr[5] * m2Ptr[ 5 * 4 +
j ];
1342 for ( i = 0; i < 5; i++ ) {
1343 for ( j = 0; j < 5; j++ ) {
1344 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 5 +
j ]
1345 + m1Ptr[1] * m2Ptr[ 1 * 5 +
j ]
1346 + m1Ptr[2] * m2Ptr[ 2 * 5 +
j ]
1347 + m1Ptr[3] * m2Ptr[ 3 * 5 +
j ]
1348 + m1Ptr[4] * m2Ptr[ 4 * 5 +
j ]
1349 + m1Ptr[5] * m2Ptr[ 5 * 5 +
j ];
1360 for ( i = 0; i < 6; i++ ) {
1361 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 1 ]
1362 + m1Ptr[1] * m2Ptr[ 1 * 1 ]
1363 + m1Ptr[2] * m2Ptr[ 2 * 1 ]
1364 + m1Ptr[3] * m2Ptr[ 3 * 1 ]
1365 + m1Ptr[4] * m2Ptr[ 4 * 1 ]
1366 + m1Ptr[5] * m2Ptr[ 5 * 1 ];
1373 for ( i = 0; i < 6; i++ ) {
1374 for ( j = 0; j < 2; j++ ) {
1375 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 2 +
j ]
1376 + m1Ptr[1] * m2Ptr[ 1 * 2 +
j ]
1377 + m1Ptr[2] * m2Ptr[ 2 * 2 +
j ]
1378 + m1Ptr[3] * m2Ptr[ 3 * 2 +
j ]
1379 + m1Ptr[4] * m2Ptr[ 4 * 2 +
j ]
1380 + m1Ptr[5] * m2Ptr[ 5 * 2 +
j ];
1388 for ( i = 0; i < 6; i++ ) {
1389 for ( j = 0; j < 3; j++ ) {
1390 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 3 +
j ]
1391 + m1Ptr[1] * m2Ptr[ 1 * 3 +
j ]
1392 + m1Ptr[2] * m2Ptr[ 2 * 3 +
j ]
1393 + m1Ptr[3] * m2Ptr[ 3 * 3 +
j ]
1394 + m1Ptr[4] * m2Ptr[ 4 * 3 +
j ]
1395 + m1Ptr[5] * m2Ptr[ 5 * 3 +
j ];
1403 for ( i = 0; i < 6; i++ ) {
1404 for ( j = 0; j < 4; j++ ) {
1405 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 4 +
j ]
1406 + m1Ptr[1] * m2Ptr[ 1 * 4 +
j ]
1407 + m1Ptr[2] * m2Ptr[ 2 * 4 +
j ]
1408 + m1Ptr[3] * m2Ptr[ 3 * 4 +
j ]
1409 + m1Ptr[4] * m2Ptr[ 4 * 4 +
j ]
1410 + m1Ptr[5] * m2Ptr[ 5 * 4 +
j ];
1418 for ( i = 0; i < 6; i++ ) {
1419 for ( j = 0; j < 5; j++ ) {
1420 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 5 +
j ]
1421 + m1Ptr[1] * m2Ptr[ 1 * 5 +
j ]
1422 + m1Ptr[2] * m2Ptr[ 2 * 5 +
j ]
1423 + m1Ptr[3] * m2Ptr[ 3 * 5 +
j ]
1424 + m1Ptr[4] * m2Ptr[ 4 * 5 +
j ]
1425 + m1Ptr[5] * m2Ptr[ 5 * 5 +
j ];
1433 for ( i = 0; i < 6; i++ ) {
1434 for ( j = 0; j < 6; j++ ) {
1435 *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 6 +
j ]
1436 + m1Ptr[1] * m2Ptr[ 1 * 6 +
j ]
1437 + m1Ptr[2] * m2Ptr[ 2 * 6 +
j ]
1438 + m1Ptr[3] * m2Ptr[ 3 * 6 +
j ]
1439 + m1Ptr[4] * m2Ptr[ 4 * 6 +
j ]
1440 + m1Ptr[5] * m2Ptr[ 5 * 6 +
j ];
1450 for ( i = 0; i < k; i++ ) {
1452 for ( j = 0; j <
l; j++ ) {
1453 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[
l] + m1Ptr[2] * m2Ptr[2*
l] +
1454 m1Ptr[3] * m2Ptr[3*
l] + m1Ptr[4] * m2Ptr[4*
l] + m1Ptr[5] * m2Ptr[5*
l];
1462 for ( i = 0; i < k; i++ ) {
1463 for ( j = 0; j <
l; j++ ) {
1465 sum = m1Ptr[0] * m2Ptr[0];
1468 sum += m1Ptr[
n] * m2Ptr[0];
1494 const float *m1Ptr, *m2Ptr;
1507 if ( k == 6 && l == 1 ) {
1508 for ( i = 0; i < 6; i++ ) {
1509 *dstPtr++ = m1Ptr[0] * m2Ptr[0];
1514 for ( i = 0; i < k; i++ ) {
1516 for ( j = 0; j <
l; j++ ) {
1517 *dstPtr++ = m1Ptr[0] * m2Ptr[0];
1524 if ( k == 6 && l == 2 ) {
1525 for ( i = 0; i < 6; i++ ) {
1526 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*2+0] + m1Ptr[1*6] * m2Ptr[1*2+0];
1527 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*2+1] + m1Ptr[1*6] * m2Ptr[1*2+1];
1532 for ( i = 0; i < k; i++ ) {
1534 for ( j = 0; j <
l; j++ ) {
1535 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[
l];
1542 if ( k == 6 && l == 3 ) {
1543 for ( i = 0; i < 6; i++ ) {
1544 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*3+0] + m1Ptr[1*6] * m2Ptr[1*3+0] + m1Ptr[2*6] * m2Ptr[2*3+0];
1545 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*3+1] + m1Ptr[1*6] * m2Ptr[1*3+1] + m1Ptr[2*6] * m2Ptr[2*3+1];
1546 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*3+2] + m1Ptr[1*6] * m2Ptr[1*3+2] + m1Ptr[2*6] * m2Ptr[2*3+2];
1551 for ( i = 0; i < k; i++ ) {
1553 for ( j = 0; j <
l; j++ ) {
1554 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[
l] + m1Ptr[2*k] * m2Ptr[2*
l];
1561 if ( k == 6 && l == 4 ) {
1562 for ( i = 0; i < 6; i++ ) {
1563 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+0] + m1Ptr[1*6] * m2Ptr[1*4+0] + m1Ptr[2*6] * m2Ptr[2*4+0] + m1Ptr[3*6] * m2Ptr[3*4+0];
1564 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+1] + m1Ptr[1*6] * m2Ptr[1*4+1] + m1Ptr[2*6] * m2Ptr[2*4+1] + m1Ptr[3*6] * m2Ptr[3*4+1];
1565 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+2] + m1Ptr[1*6] * m2Ptr[1*4+2] + m1Ptr[2*6] * m2Ptr[2*4+2] + m1Ptr[3*6] * m2Ptr[3*4+2];
1566 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+3] + m1Ptr[1*6] * m2Ptr[1*4+3] + m1Ptr[2*6] * m2Ptr[2*4+3] + m1Ptr[3*6] * m2Ptr[3*4+3];
1571 for ( i = 0; i < k; i++ ) {
1573 for ( j = 0; j <
l; j++ ) {
1574 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[
l] + m1Ptr[2*k] * m2Ptr[2*
l] +
1575 m1Ptr[3*k] * m2Ptr[3*
l];
1582 if ( k == 6 && l == 5 ) {
1583 for ( i = 0; i < 6; i++ ) {
1584 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+0] + m1Ptr[1*6] * m2Ptr[1*5+0] + m1Ptr[2*6] * m2Ptr[2*5+0] + m1Ptr[3*6] * m2Ptr[3*5+0] + m1Ptr[4*6] * m2Ptr[4*5+0];
1585 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+1] + m1Ptr[1*6] * m2Ptr[1*5+1] + m1Ptr[2*6] * m2Ptr[2*5+1] + m1Ptr[3*6] * m2Ptr[3*5+1] + m1Ptr[4*6] * m2Ptr[4*5+1];
1586 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+2] + m1Ptr[1*6] * m2Ptr[1*5+2] + m1Ptr[2*6] * m2Ptr[2*5+2] + m1Ptr[3*6] * m2Ptr[3*5+2] + m1Ptr[4*6] * m2Ptr[4*5+2];
1587 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+3] + m1Ptr[1*6] * m2Ptr[1*5+3] + m1Ptr[2*6] * m2Ptr[2*5+3] + m1Ptr[3*6] * m2Ptr[3*5+3] + m1Ptr[4*6] * m2Ptr[4*5+3];
1588 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+4] + m1Ptr[1*6] * m2Ptr[1*5+4] + m1Ptr[2*6] * m2Ptr[2*5+4] + m1Ptr[3*6] * m2Ptr[3*5+4] + m1Ptr[4*6] * m2Ptr[4*5+4];
1593 for ( i = 0; i < k; i++ ) {
1595 for ( j = 0; j <
l; j++ ) {
1596 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[
l] + m1Ptr[2*k] * m2Ptr[2*
l] +
1597 m1Ptr[3*k] * m2Ptr[3*
l] + m1Ptr[4*k] * m2Ptr[4*
l];
1608 for ( j = 0; j < 6; j++ ) {
1609 *dstPtr++ = m1Ptr[0*1] * m2Ptr[0*6] +
1610 m1Ptr[1*1] * m2Ptr[1*6] +
1611 m1Ptr[2*1] * m2Ptr[2*6] +
1612 m1Ptr[3*1] * m2Ptr[3*6] +
1613 m1Ptr[4*1] * m2Ptr[4*6] +
1614 m1Ptr[5*1] * m2Ptr[5*6];
1619 for ( i = 0; i < 2; i++ ) {
1621 for ( j = 0; j < 6; j++ ) {
1622 *dstPtr++ = m1Ptr[0*2] * m2Ptr[0*6] +
1623 m1Ptr[1*2] * m2Ptr[1*6] +
1624 m1Ptr[2*2] * m2Ptr[2*6] +
1625 m1Ptr[3*2] * m2Ptr[3*6] +
1626 m1Ptr[4*2] * m2Ptr[4*6] +
1627 m1Ptr[5*2] * m2Ptr[5*6];
1634 for ( i = 0; i < 3; i++ ) {
1636 for ( j = 0; j < 6; j++ ) {
1637 *dstPtr++ = m1Ptr[0*3] * m2Ptr[0*6] +
1638 m1Ptr[1*3] * m2Ptr[1*6] +
1639 m1Ptr[2*3] * m2Ptr[2*6] +
1640 m1Ptr[3*3] * m2Ptr[3*6] +
1641 m1Ptr[4*3] * m2Ptr[4*6] +
1642 m1Ptr[5*3] * m2Ptr[5*6];
1649 for ( i = 0; i < 4; i++ ) {
1651 for ( j = 0; j < 6; j++ ) {
1652 *dstPtr++ = m1Ptr[0*4] * m2Ptr[0*6] +
1653 m1Ptr[1*4] * m2Ptr[1*6] +
1654 m1Ptr[2*4] * m2Ptr[2*6] +
1655 m1Ptr[3*4] * m2Ptr[3*6] +
1656 m1Ptr[4*4] * m2Ptr[4*6] +
1657 m1Ptr[5*4] * m2Ptr[5*6];
1664 for ( i = 0; i < 5; i++ ) {
1666 for ( j = 0; j < 6; j++ ) {
1667 *dstPtr++ = m1Ptr[0*5] * m2Ptr[0*6] +
1668 m1Ptr[1*5] * m2Ptr[1*6] +
1669 m1Ptr[2*5] * m2Ptr[2*6] +
1670 m1Ptr[3*5] * m2Ptr[3*6] +
1671 m1Ptr[4*5] * m2Ptr[4*6] +
1672 m1Ptr[5*5] * m2Ptr[5*6];
1679 for ( i = 0; i < 6; i++ ) {
1681 for ( j = 0; j < 6; j++ ) {
1682 *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*6] +
1683 m1Ptr[1*6] * m2Ptr[1*6] +
1684 m1Ptr[2*6] * m2Ptr[2*6] +
1685 m1Ptr[3*6] * m2Ptr[3*6] +
1686 m1Ptr[4*6] * m2Ptr[4*6] +
1687 m1Ptr[5*6] * m2Ptr[5*6];
1695 for ( i = 0; i < k; i++ ) {
1697 for ( j = 0; j <
l; j++ ) {
1698 *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[
l] + m1Ptr[2*k] * m2Ptr[2*
l] +
1699 m1Ptr[3*k] * m2Ptr[3*
l] + m1Ptr[4*k] * m2Ptr[4*
l] + m1Ptr[5*k] * m2Ptr[5*
l];
1706 for ( i = 0; i < k; i++ ) {
1707 for ( j = 0; j <
l; j++ ) {
1710 sum = m1Ptr[0] * m2Ptr[0];
1714 sum += m1Ptr[0] * m2Ptr[0];
1748 #define NSKIP( n, s ) ((n<<3)|(s&7))
1749 switch(
NSKIP( n, skip ) ) {
1750 case NSKIP( 1, 0 ): x[0] = b[0];
1752 case NSKIP( 2, 0 ): x[0] = b[0];
1753 case NSKIP( 2, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
1755 case NSKIP( 3, 0 ): x[0] = b[0];
1756 case NSKIP( 3, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
1757 case NSKIP( 3, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
1759 case NSKIP( 4, 0 ): x[0] = b[0];
1760 case NSKIP( 4, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
1761 case NSKIP( 4, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
1762 case NSKIP( 4, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
1764 case NSKIP( 5, 0 ): x[0] = b[0];
1765 case NSKIP( 5, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
1766 case NSKIP( 5, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
1767 case NSKIP( 5, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
1768 case NSKIP( 5, 4 ): x[4] = b[4] - lptr[4*nc+0] * x[0] - lptr[4*nc+1] * x[1] - lptr[4*nc+2] * x[2] - lptr[4*nc+3] * x[3];
1770 case NSKIP( 6, 0 ): x[0] = b[0];
1771 case NSKIP( 6, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
1772 case NSKIP( 6, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
1773 case NSKIP( 6, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
1774 case NSKIP( 6, 4 ): x[4] = b[4] - lptr[4*nc+0] * x[0] - lptr[4*nc+1] * x[1] - lptr[4*nc+2] * x[2] - lptr[4*nc+3] * x[3];
1775 case NSKIP( 6, 5 ): x[5] = b[5] - lptr[5*nc+0] * x[0] - lptr[5*nc+1] * x[1] - lptr[5*nc+2] * x[2] - lptr[5*nc+3] * x[3] - lptr[5*nc+4] * x[4];
1777 case NSKIP( 7, 0 ): x[0] = b[0];
1778 case NSKIP( 7, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
1779 case NSKIP( 7, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
1780 case NSKIP( 7, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
1781 case NSKIP( 7, 4 ): x[4] = b[4] - lptr[4*nc+0] * x[0] - lptr[4*nc+1] * x[1] - lptr[4*nc+2] * x[2] - lptr[4*nc+3] * x[3];
1782 case NSKIP( 7, 5 ): x[5] = b[5] - lptr[5*nc+0] * x[0] - lptr[5*nc+1] * x[1] - lptr[5*nc+2] * x[2] - lptr[5*nc+3] * x[3] - lptr[5*nc+4] * x[4];
1783 case NSKIP( 7, 6 ): x[6] = b[6] - lptr[6*nc+0] * x[0] - lptr[6*nc+1] * x[1] - lptr[6*nc+2] * x[2] - lptr[6*nc+3] * x[3] - lptr[6*nc+4] * x[4] - lptr[6*nc+5] * x[5];
1791 case 0: x[0] = b[0];
1792 case 1: x[1] = b[1] - lptr[1*nc+0] * x[0];
1793 case 2: x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
1794 case 3: x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
1801 register double s0, s1, s2, s3;
1803 for ( i = skip; i <
n; i++ ) {
1804 s0 = lptr[0] * x[0];
1805 s1 = lptr[1] * x[1];
1806 s2 = lptr[2] * x[2];
1807 s3 = lptr[3] * x[3];
1808 for ( j = 4; j < i-7; j += 8 ) {
1809 s0 += lptr[j+0] * x[j+0];
1810 s1 += lptr[j+1] * x[j+1];
1811 s2 += lptr[j+2] * x[j+2];
1812 s3 += lptr[j+3] * x[j+3];
1813 s0 += lptr[j+4] * x[j+4];
1814 s1 += lptr[j+5] * x[j+5];
1815 s2 += lptr[j+6] * x[j+6];
1816 s3 += lptr[j+7] * x[j+7];
1820 case 7: s0 += lptr[j+6] * x[j+6];
1821 case 6: s1 += lptr[j+5] * x[j+5];
1822 case 5: s2 += lptr[j+4] * x[j+4];
1823 case 4: s3 += lptr[j+3] * x[j+3];
1824 case 3: s0 += lptr[j+2] * x[j+2];
1825 case 2: s1 += lptr[j+1] * x[j+1];
1826 case 1: s2 += lptr[j+0] * x[j+0];
1845 for ( i = skip; i <
n; i++ ) {
1848 for ( j = 0; j <
i; j++ ) {
1849 sum -= lptr[
j] * x[
j];
1885 x[0] = b[0] - lptr[1*nc+0] * x[1];
1889 x[1] = b[1] - lptr[2*nc+1] * x[2];
1890 x[0] = b[0] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
1894 x[2] = b[2] - lptr[3*nc+2] * x[3];
1895 x[1] = b[1] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
1896 x[0] = b[0] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
1900 x[3] = b[3] - lptr[4*nc+3] * x[4];
1901 x[2] = b[2] - lptr[4*nc+2] * x[4] - lptr[3*nc+2] * x[3];
1902 x[1] = b[1] - lptr[4*nc+1] * x[4] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
1903 x[0] = b[0] - lptr[4*nc+0] * x[4] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
1907 x[4] = b[4] - lptr[5*nc+4] * x[5];
1908 x[3] = b[3] - lptr[5*nc+3] * x[5] - lptr[4*nc+3] * x[4];
1909 x[2] = b[2] - lptr[5*nc+2] * x[5] - lptr[4*nc+2] * x[4] - lptr[3*nc+2] * x[3];
1910 x[1] = b[1] - lptr[5*nc+1] * x[5] - lptr[4*nc+1] * x[4] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
1911 x[0] = b[0] - lptr[5*nc+0] * x[5] - lptr[4*nc+0] * x[4] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
1915 x[5] = b[5] - lptr[6*nc+5] * x[6];
1916 x[4] = b[4] - lptr[6*nc+4] * x[6] - lptr[5*nc+4] * x[5];
1917 x[3] = b[3] - lptr[6*nc+3] * x[6] - lptr[5*nc+3] * x[5] - lptr[4*nc+3] * x[4];
1918 x[2] = b[2] - lptr[6*nc+2] * x[6] - lptr[5*nc+2] * x[5] - lptr[4*nc+2] * x[4] - lptr[3*nc+2] * x[3];
1919 x[1] = b[1] - lptr[6*nc+1] * x[6] - lptr[5*nc+1] * x[5] - lptr[4*nc+1] * x[4] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
1920 x[0] = b[0] - lptr[6*nc+0] * x[6] - lptr[5*nc+0] * x[5] - lptr[4*nc+0] * x[4] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
1927 register double s0, s1, s2, s3;
1934 for ( i = n; i >= 4; i -= 4 ) {
1940 for ( j = 0; j < n-
i; j += 4 ) {
1941 s0 -= lptr[(j+0)*nc+0] * xptr[j+0];
1942 s1 -= lptr[(j+0)*nc+1] * xptr[j+0];
1943 s2 -= lptr[(j+0)*nc+2] * xptr[j+0];
1944 s3 -= lptr[(j+0)*nc+3] * xptr[j+0];
1945 s0 -= lptr[(j+1)*nc+0] * xptr[j+1];
1946 s1 -= lptr[(j+1)*nc+1] * xptr[j+1];
1947 s2 -= lptr[(j+1)*nc+2] * xptr[j+1];
1948 s3 -= lptr[(j+1)*nc+3] * xptr[j+1];
1949 s0 -= lptr[(j+2)*nc+0] * xptr[j+2];
1950 s1 -= lptr[(j+2)*nc+1] * xptr[j+2];
1951 s2 -= lptr[(j+2)*nc+2] * xptr[j+2];
1952 s3 -= lptr[(j+2)*nc+3] * xptr[j+2];
1953 s0 -= lptr[(j+3)*nc+0] * xptr[j+3];
1954 s1 -= lptr[(j+3)*nc+1] * xptr[j+3];
1955 s2 -= lptr[(j+3)*nc+2] * xptr[j+3];
1956 s3 -= lptr[(j+3)*nc+3] * xptr[j+3];
1959 s0 -= lptr[0-1*nc] * s3;
1960 s1 -= lptr[1-1*nc] * s3;
1961 s2 -= lptr[2-1*nc] * s3;
1962 s0 -= lptr[0-2*nc] * s2;
1963 s1 -= lptr[1-2*nc] * s2;
1964 s0 -= lptr[0-3*nc] * s1;
1975 for ( i--; i >= 0; i-- ) {
1978 for ( j = i + 1; j <
n; j++ ) {
1979 s0 -= lptr[j*nc] * x[
j];
1991 for ( i = n - 1; i >= 0; i-- ) {
1994 for ( j = i + 1; j <
n; j++ ) {
1995 sum -= ptr[j*nc] * x[
j];
2015 float *
v, *diag, *mptr;
2016 double s0, s1, s2, s3, sum, d;
2018 v = (
float *) _alloca16( n *
sizeof(
float ) );
2019 diag = (
float *) _alloca16( n *
sizeof(
float ) );
2031 if ( sum == 0.0
f ) {
2036 invDiag[0] = d = 1.0f / sum;
2043 for ( j = 1; j <
n; j++ ) {
2044 mptr[j*nc+0] = ( mptr[j*nc+0] ) * d;
2049 v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
2052 if ( sum == 0.0
f ) {
2058 invDiag[1] = d = 1.0f / sum;
2065 for ( j = 2; j <
n; j++ ) {
2066 mptr[j*nc+1] = ( mptr[j*nc+1] - v[0] * mptr[j*nc+0] ) * d;
2071 v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
2072 v[1] = diag[1] * mptr[1]; s1 = v[1] * mptr[1];
2073 sum = mptr[2] - s0 - s1;
2075 if ( sum == 0.0
f ) {
2081 invDiag[2] = d = 1.0f / sum;
2088 for ( j = 3; j <
n; j++ ) {
2089 mptr[j*nc+2] = ( mptr[j*nc+2] - v[0] * mptr[j*nc+0] - v[1] * mptr[j*nc+1] ) * d;
2094 v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
2095 v[1] = diag[1] * mptr[1]; s1 = v[1] * mptr[1];
2096 v[2] = diag[2] * mptr[2]; s2 = v[2] * mptr[2];
2097 sum = mptr[3] - s0 - s1 - s2;
2099 if ( sum == 0.0
f ) {
2105 invDiag[3] = d = 1.0f / sum;
2112 for ( j = 4; j <
n; j++ ) {
2113 mptr[j*nc+3] = ( mptr[j*nc+3] - v[0] * mptr[j*nc+0] - v[1] * mptr[j*nc+1] - v[2] * mptr[j*nc+2] ) * d;
2116 for ( i = 4; i <
n; i++ ) {
2120 v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
2121 v[1] = diag[1] * mptr[1]; s1 = v[1] * mptr[1];
2122 v[2] = diag[2] * mptr[2]; s2 = v[2] * mptr[2];
2123 v[3] = diag[3] * mptr[3]; s3 = v[3] * mptr[3];
2124 for ( k = 4; k < i-3; k += 4 ) {
2125 v[k+0] = diag[k+0] * mptr[k+0]; s0 += v[k+0] * mptr[k+0];
2126 v[k+1] = diag[k+1] * mptr[k+1]; s1 += v[k+1] * mptr[k+1];
2127 v[k+2] = diag[k+2] * mptr[k+2]; s2 += v[k+2] * mptr[k+2];
2128 v[k+3] = diag[k+3] * mptr[k+3]; s3 += v[k+3] * mptr[k+3];
2132 case 3: v[k+2] = diag[k+2] * mptr[k+2]; s0 += v[k+2] * mptr[k+2];
2133 case 2: v[k+1] = diag[k+1] * mptr[k+1]; s1 += v[k+1] * mptr[k+1];
2134 case 1: v[k+0] = diag[k+0] * mptr[k+0]; s2 += v[k+0] * mptr[k+0];
2141 sum = mptr[
i] - sum;
2143 if ( sum == 0.0
f ) {
2149 invDiag[
i] = d = 1.0f / sum;
2156 for ( j = i+1; j <
n; j++ ) {
2157 s0 = mptr[0] * v[0];
2158 s1 = mptr[1] * v[1];
2159 s2 = mptr[2] * v[2];
2160 s3 = mptr[3] * v[3];
2161 for ( k = 4; k < i-7; k += 8 ) {
2162 s0 += mptr[k+0] * v[k+0];
2163 s1 += mptr[k+1] * v[k+1];
2164 s2 += mptr[k+2] * v[k+2];
2165 s3 += mptr[k+3] * v[k+3];
2166 s0 += mptr[k+4] * v[k+4];
2167 s1 += mptr[k+5] * v[k+5];
2168 s2 += mptr[k+6] * v[k+6];
2169 s3 += mptr[k+7] * v[k+7];
2173 case 7: s0 += mptr[k+6] * v[k+6];
2174 case 6: s1 += mptr[k+5] * v[k+5];
2175 case 5: s2 += mptr[k+4] * v[k+4];
2176 case 4: s3 += mptr[k+3] * v[k+3];
2177 case 3: s0 += mptr[k+2] * v[k+2];
2178 case 2: s1 += mptr[k+1] * v[k+1];
2179 case 1: s2 += mptr[k+0] * v[k+0];
2186 mptr[
i] = ( mptr[
i] - sum ) * d;
2196 float *
v, *ptr, *diagPtr;
2199 v = (
float *) _alloca16( n *
sizeof(
float ) );
2202 for ( i = 0; i <
n; i++ ) {
2207 for ( j = 0; j <
i; j++ ) {
2209 v[
j] = diagPtr[0] * d;
2214 if ( sum == 0.0
f ) {
2219 invDiag[
i] = d = 1.0f / sum;
2226 for ( j = i + 1; j <
n; j++ ) {
2228 for ( k = 0; k <
i; k++ ) {
2229 sum -= ptr[k] * v[k];
2249 for ( i = 0; i < numJoints; i++ ) {
2251 joints[
j].
q.
Slerp( joints[j].
q, blendJoints[j].q, lerp );
2252 joints[
j].
t.
Lerp( joints[j].
t, blendJoints[j].t, lerp );
2264 for ( i = 0; i < numJoints; i++ ) {
2278 for ( i = 0; i < numJoints; i++ ) {
2291 for( i = firstJoint; i <= lastJoint; i++ ) {
2292 assert( parents[i] < i );
2293 jointMats[
i] *= jointMats[parents[
i]];
2305 for( i = lastJoint; i >= firstJoint; i-- ) {
2306 assert( parents[i] < i );
2307 jointMats[
i] /= jointMats[parents[
i]];
2318 const byte *jointsPtr = (
byte *)joints;
2320 for( j = i = 0; i < numVerts; i++ ) {
2323 v = ( *(
idJointMat *) ( jointsPtr + index[j*2+0] ) ) * weights[j];
2324 while( index[j*2+1] == 0 ) {
2326 v += ( *(
idJointMat *) ( jointsPtr + index[j*2+0] ) ) * weights[j];
2345 for ( i = 0; i < numVerts; i++ ) {
2347 float d0, d1, d2, d3,
t;
2390 for ( i = 0; i < numVerts; i++ ) {
2392 float d0, d1, d2, d3, d4, d5;
2409 cullBits[
i] = bits ^ 0x3F;
2421 for ( i = 0; i < numVerts; i++ ) {
2426 texCoords[
i][0] = d0 = planes[0].
Distance( v );
2427 texCoords[
i][1] = d1 = planes[1].
Distance( v );
2450 for ( i = 0; i < numIndexes; i += 3 ) {
2452 float d0[3], d1[3],
f;
2455 a = verts + indexes[i + 0];
2456 b = verts + indexes[i + 1];
2457 c = verts + indexes[i + 2];
2459 d0[0] = b->
xyz[0] - a->
xyz[0];
2460 d0[1] = b->
xyz[1] - a->
xyz[1];
2461 d0[2] = b->
xyz[2] - a->
xyz[2];
2463 d1[0] = c->
xyz[0] - a->
xyz[0];
2464 d1[1] = c->
xyz[1] - a->
xyz[1];
2465 d1[2] = c->
xyz[2] - a->
xyz[2];
2467 n[0] = d1[1] * d0[2] - d1[2] * d0[1];
2468 n[1] = d1[2] * d0[0] - d1[0] * d0[2];
2469 n[2] = d1[0] * d0[1] - d1[1] * d0[0];
2496 bool *used = (
bool *)_alloca16( numVerts *
sizeof( used[0] ) );
2497 memset( used, 0, numVerts *
sizeof( used[0] ) );
2500 for ( i = 0; i < numIndexes; i += 3 ) {
2502 unsigned long signBit;
2503 float d0[5], d1[5],
f, area;
2506 int v0 = indexes[i + 0];
2507 int v1 = indexes[i + 1];
2508 int v2 = indexes[i + 2];
2514 d0[0] = b->
xyz[0] - a->
xyz[0];
2515 d0[1] = b->
xyz[1] - a->
xyz[1];
2516 d0[2] = b->
xyz[2] - a->
xyz[2];
2517 d0[3] = b->
st[0] - a->
st[0];
2518 d0[4] = b->
st[1] - a->
st[1];
2520 d1[0] = c->
xyz[0] - a->
xyz[0];
2521 d1[1] = c->
xyz[1] - a->
xyz[1];
2522 d1[2] = c->
xyz[2] - a->
xyz[2];
2523 d1[3] = c->
st[0] - a->
st[0];
2524 d1[4] = c->
st[1] - a->
st[1];
2527 n[0] = d1[1] * d0[2] - d1[2] * d0[1];
2528 n[1] = d1[2] * d0[0] - d1[0] * d0[2];
2529 n[2] = d1[0] * d0[1] - d1[1] * d0[0];
2542 area = d0[3] * d1[4] - d0[4] * d1[3];
2543 signBit = ( *(
unsigned long *)&area ) & ( 1 << 31 );
2546 t0[0] = d0[0] * d1[4] - d0[4] * d1[0];
2547 t0[1] = d0[1] * d1[4] - d0[4] * d1[1];
2548 t0[2] = d0[2] * d1[4] - d0[4] * d1[2];
2551 *(
unsigned long *)&f ^= signBit;
2558 t1[0] = d0[3] * d1[0] - d0[0] * d1[3];
2559 t1[1] = d0[3] * d1[1] - d0[1] * d1[3];
2560 t1[2] = d0[3] * d1[2] - d0[2] * d1[3];
2563 *(
unsigned long *)&f ^= signBit;
2612 #define DERIVE_UNSMOOTHED_BITANGENT
2617 for ( i = 0; i < numVerts; i++ ) {
2619 float d0, d1, d2, d3, d4;
2620 float d5, d6, d7, d8, d9;
2632 d0 = b->
xyz[0] - a->
xyz[0];
2633 d1 = b->
xyz[1] - a->
xyz[1];
2634 d2 = b->
xyz[2] - a->
xyz[2];
2635 d3 = b->
st[0] - a->
st[0];
2636 d4 = b->
st[1] - a->
st[1];
2638 d5 = c->
xyz[0] - a->
xyz[0];
2639 d6 = c->
xyz[1] - a->
xyz[1];
2640 d7 = c->
xyz[2] - a->
xyz[2];
2641 d8 = c->
st[0] - a->
st[0];
2642 d9 = c->
st[1] - a->
st[1];
2648 n0 = s2 * ( d6 * d2 - d7 * d1 );
2649 n1 = s2 * ( d7 * d0 - d5 * d2 );
2650 n2 = s2 * ( d5 * d1 - d6 * d0 );
2652 t0 = s0 * ( d0 * d9 - d4 * d5 );
2653 t1 = s0 * ( d1 * d9 - d4 * d6 );
2654 t2 = s0 * ( d2 * d9 - d4 * d7 );
2656 #ifndef DERIVE_UNSMOOTHED_BITANGENT
2657 t3 = s1 * ( d3 * d5 - d0 * d8 );
2658 t4 = s1 * ( d3 * d6 - d1 * d8 );
2659 t5 = s1 * ( d3 * d7 - d2 * d8 );
2661 t3 = s1 * ( n2 * t1 - n1 * t2 );
2662 t4 = s1 * ( n0 * t2 - n2 * t0 );
2663 t5 = s1 * ( n1 * t0 - n0 * t1 );
2690 for (
int i = 0;
i < numVerts;
i++ ) {
2695 v.
x *=
f; v.
y *=
f; v.
z *=
f;
2697 for (
int j = 0;
j < 2;
j++ ) {
2702 t.
x *=
f; t.
y *=
f; t.
z *=
f;
2718 bool *used = (
bool *)_alloca16( numVerts *
sizeof( used[0] ) );
2719 memset( used, 0, numVerts *
sizeof( used[0] ) );
2721 for (
int i = numIndexes - 1;
i >= 0;
i-- ) {
2722 used[indexes[
i]] =
true;
2725 for (
int i = 0;
i < numVerts;
i++ ) {
2734 lightVectors[
i][0] = lightDir * v->
tangents[0];
2735 lightVectors[
i][1] = lightDir * v->
tangents[1];
2736 lightVectors[
i][2] = lightDir * v->
normal;
2752 bool *used = (
bool *)_alloca16( numVerts *
sizeof( used[0] ) );
2753 memset( used, 0, numVerts *
sizeof( used[0] ) );
2755 for (
int i = numIndexes - 1;
i >= 0;
i-- ) {
2756 used[indexes[
i]] =
true;
2759 for (
int i = 0;
i < numVerts;
i++ ) {
2772 lightDir[0] *= ilength;
2773 lightDir[1] *= ilength;
2774 lightDir[2] *= ilength;
2777 viewDir[0] *= ilength;
2778 viewDir[1] *= ilength;
2779 viewDir[2] *= ilength;
2781 lightDir += viewDir;
2783 texCoords[
i][0] = lightDir * v->
tangents[0];
2784 texCoords[
i][1] = lightDir * v->
tangents[1];
2785 texCoords[
i][2] = lightDir * v->
normal;
2786 texCoords[
i][3] = 1.0f;
2798 for (
int i = 0;
i < numVerts;
i++ ) {
2799 if ( vertRemap[
i] ) {
2803 vertexCache[outVerts+0][0] = v[0];
2804 vertexCache[outVerts+0][1] = v[1];
2805 vertexCache[outVerts+0][2] = v[2];
2806 vertexCache[outVerts+0][3] = 1.0f;
2811 vertexCache[outVerts+1][0] = v[0] - lightOrigin[0];
2812 vertexCache[outVerts+1][1] = v[1] - lightOrigin[1];
2813 vertexCache[outVerts+1][2] = v[2] - lightOrigin[2];
2814 vertexCache[outVerts+1][3] = 0.0f;
2815 vertRemap[
i] = outVerts;
2827 for (
int i = 0;
i < numVerts;
i++ ) {
2829 vertexCache[
i*2+0][0] = v[0];
2830 vertexCache[
i*2+1][0] = v[0];
2831 vertexCache[
i*2+0][1] = v[1];
2832 vertexCache[
i*2+1][1] = v[1];
2833 vertexCache[
i*2+0][2] = v[2];
2834 vertexCache[
i*2+1][2] = v[2];
2835 vertexCache[
i*2+0][3] = 1.0f;
2836 vertexCache[
i*2+1][3] = 0.0f;
2838 return numVerts * 2;
2849 if ( kHz == 11025 ) {
2850 if ( numChannels == 1 ) {
2851 for (
int i = 0;
i < numSamples;
i++ ) {
2852 dest[
i*4+0] = dest[
i*4+1] = dest[
i*4+2] = dest[
i*4+3] = (
float) src[
i+0];
2855 for (
int i = 0;
i < numSamples;
i += 2 ) {
2856 dest[
i*4+0] = dest[
i*4+2] = dest[
i*4+4] = dest[
i*4+6] = (
float) src[
i+0];
2857 dest[
i*4+1] = dest[
i*4+3] = dest[
i*4+5] = dest[
i*4+7] = (
float) src[
i+1];
2860 }
else if ( kHz == 22050 ) {
2861 if ( numChannels == 1 ) {
2862 for (
int i = 0;
i < numSamples;
i++ ) {
2863 dest[
i*2+0] = dest[
i*2+1] = (
float) src[
i+0];
2866 for (
int i = 0;
i < numSamples;
i += 2 ) {
2867 dest[
i*2+0] = dest[
i*2+2] = (
float) src[
i+0];
2868 dest[
i*2+1] = dest[
i*2+3] = (
float) src[
i+1];
2871 }
else if ( kHz == 44100 ) {
2872 for (
int i = 0;
i < numSamples;
i++ ) {
2888 if ( kHz == 11025 ) {
2889 if ( numChannels == 1 ) {
2890 for (
int i = 0;
i < numSamples;
i++ ) {
2891 dest[
i*4+0] = dest[
i*4+1] = dest[
i*4+2] = dest[
i*4+3] = ogg[0][
i] * 32768.0f;
2894 for (
int i = 0; i < numSamples >> 1;
i++ ) {
2895 dest[
i*8+0] = dest[
i*8+2] = dest[
i*8+4] = dest[
i*8+6] = ogg[0][
i] * 32768.0f;
2896 dest[
i*8+1] = dest[
i*8+3] = dest[
i*8+5] = dest[
i*8+7] = ogg[1][
i] * 32768.0f;
2899 }
else if ( kHz == 22050 ) {
2900 if ( numChannels == 1 ) {
2901 for (
int i = 0;
i < numSamples;
i++ ) {
2902 dest[
i*2+0] = dest[
i*2+1] = ogg[0][
i] * 32768.0f;
2905 for (
int i = 0; i < numSamples >> 1;
i++ ) {
2906 dest[
i*4+0] = dest[
i*4+2] = ogg[0][
i] * 32768.0f;
2907 dest[
i*4+1] = dest[
i*4+3] = ogg[1][
i] * 32768.0f;
2910 }
else if ( kHz == 44100 ) {
2911 if ( numChannels == 1 ) {
2912 for (
int i = 0;
i < numSamples;
i++ ) {
2913 dest[
i*1+0] = ogg[0][
i] * 32768.0f;
2916 for (
int i = 0; i < numSamples >> 1;
i++ ) {
2917 dest[
i*2+0] = ogg[0][
i] * 32768.0f;
2918 dest[
i*2+1] = ogg[1][
i] * 32768.0f;
2932 float sL = lastV[0];
2933 float sR = lastV[1];
2940 mixBuffer[
j*2+0] += samples[
j] * sL;
2941 mixBuffer[
j*2+1] += samples[
j] * sR;
2953 float sL = lastV[0];
2954 float sR = lastV[1];
2961 mixBuffer[
j*2+0] += samples[
j*2+0] * sL;
2962 mixBuffer[
j*2+1] += samples[
j*2+1] * sR;
2974 float sL0 = lastV[0];
2975 float sL1 = lastV[1];
2976 float sL2 = lastV[2];
2977 float sL3 = lastV[3];
2978 float sL4 = lastV[4];
2979 float sL5 = lastV[5];
2991 mixBuffer[
i*6+0] += samples[
i] * sL0;
2992 mixBuffer[
i*6+1] += samples[
i] * sL1;
2993 mixBuffer[
i*6+2] += samples[
i] * sL2;
2994 mixBuffer[
i*6+3] += samples[
i] * sL3;
2995 mixBuffer[
i*6+4] += samples[
i] * sL4;
2996 mixBuffer[
i*6+5] += samples[
i] * sL5;
3012 float sL0 = lastV[0];
3013 float sL1 = lastV[1];
3014 float sL2 = lastV[2];
3015 float sL3 = lastV[3];
3016 float sL4 = lastV[4];
3017 float sL5 = lastV[5];
3029 mixBuffer[
i*6+0] += samples[
i*2+0] * sL0;
3030 mixBuffer[
i*6+1] += samples[
i*2+1] * sL1;
3031 mixBuffer[
i*6+2] += samples[
i*2+0] * sL2;
3032 mixBuffer[
i*6+3] += samples[
i*2+0] * sL3;
3033 mixBuffer[
i*6+4] += samples[
i*2+0] * sL4;
3034 mixBuffer[
i*6+5] += samples[
i*2+1] * sL5;
3051 for (
int i = 0;
i < numSamples;
i++ ) {
3052 if ( mixBuffer[
i] <= -32768.0
f ) {
3053 samples[
i] = -32768;
3054 }
else if ( mixBuffer[
i] >= 32767.0
f ) {
3057 samples[
i] = (short) mixBuffer[
i];
virtual void VPCALL MatX_LowerTriangularSolve(const idMatX &L, float *x, const float *b, const int n, int skip=0)
virtual void VPCALL CreateSpecularTextureCoords(idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
GLdouble GLdouble GLdouble GLdouble q
virtual void VPCALL TransformVerts(idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights)
static const float INFINITY
virtual void VPCALL AddAssign16(float *dst, const float *src, const int count)
assert(prefInfo.fullscreenBtn)
virtual void VPCALL Dot(float *dst, const idVec3 &constant, const idVec3 *src, const int count)
virtual void VPCALL BlendJoints(idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints)
virtual void VPCALL Memset(void *dst, const int val, const int count)
const int MIXBUFFER_SAMPLES
const float * ToFloatPtr(void) const
float Distance(const idVec3 &v) const
virtual void VPCALL MixedSoundToSamples(short *samples, const float *mixBuffer, const int numSamples)
virtual void VPCALL ClampMax(float *dst, const float *src, const float max, const int count)
virtual void VPCALL Memcpy(void *dst, const void *src, const int count)
virtual void VPCALL MatX_TransposeMultiplyAddVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)
virtual void VPCALL TracePointCull(byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts)
virtual void VPCALL Add(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL CmpGE(byte *dst, const float *src0, const float constant, const int count)
virtual void VPCALL MixSoundTwoSpeakerStereo(float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2])
void SetNormal(const idVec3 &normal)
virtual void VPCALL MatX_LowerTriangularSolveTranspose(const idMatX &L, float *x, const float *b, const int n)
idQuat & Slerp(const idQuat &from, const idQuat &to, float t)
void SetTranslation(const idVec3 &t)
virtual void VPCALL Sub16(float *dst, const float *src1, const float *src2, const int count)
virtual void VPCALL ClampMin(float *dst, const float *src, const float min, const int count)
void SetRotation(const idMat3 &m)
virtual void VPCALL DeriveUnsmoothedTangents(idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts)
virtual void VPCALL MatX_MultiplyVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)
virtual void VPCALL MulAssign16(float *dst, const float constant, const int count)
virtual void VPCALL MixSoundTwoSpeakerMono(float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2])
GLfloat GLfloat GLfloat v2
float normalizationScale[3]
GLuint GLuint GLsizei count
int GetNumColumns(void) const
#define FLOATSIGNBITSET(f)
virtual void VPCALL OverlayPointCull(byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts)
virtual void VPCALL Negate16(float *dst, const int count)
virtual void VPCALL DecalPointCull(byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts)
virtual void VPCALL SubAssign16(float *dst, const float *src, const int count)
virtual void VPCALL MatX_MultiplyAddVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)
virtual void VPCALL MatX_MultiplyMatX(idMatX &dst, const idMatX &m1, const idMatX &m2)
virtual void VPCALL CmpLE(byte *dst, const float *src0, const float constant, const int count)
virtual void VPCALL MatX_TransposeMultiplyMatX(idMatX &dst, const idMatX &m1, const idMatX &m2)
virtual void VPCALL Div(float *dst, const float constant, const float *src, const int count)
const float * ToFloatPtr(void) const
virtual void VPCALL ConvertJointQuatsToJointMats(idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints)
int GetNumRows(void) const
virtual void VPCALL DeriveTriPlanes(idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
idJointQuat ToJointQuat(void) const
virtual bool VPCALL MatX_LDLTFactor(idMatX &mat, idVecX &invDiag, const int n)
virtual void VPCALL MulSub(float *dst, const float constant, const float *src, const int count)
GLubyte GLubyte GLubyte a
virtual void VPCALL MatX_TransposeMultiplyVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)
virtual void VPCALL MixSoundSixSpeakerStereo(float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6])
virtual void VPCALL MixSoundSixSpeakerMono(float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6])
virtual void VPCALL CreateTextureSpaceLightVectors(idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
virtual void VPCALL UpSamplePCMTo44kHz(float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels)
virtual void VPCALL DeriveTangents(idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
virtual void VPCALL Mul16(float *dst, const float *src1, const float constant, const int count)
virtual int VPCALL CreateShadowCache(idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts)
virtual void VPCALL Zero16(float *dst, const int count)
virtual void VPCALL Mul(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL NormalizeTangents(idDrawVert *verts, const int numVerts)
idVertexCache vertexCache
virtual void VPCALL Clamp(float *dst, const float *src, const float min, const float max, const int count)
virtual void VPCALL UntransformJoints(idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint)
void Lerp(const idVec3 &v1, const idVec3 &v2, const float l)
float dot(float a[], float b[])
virtual int VPCALL CreateVertexProgramShadowCache(idVec4 *vertexCache, const idDrawVert *verts, const int numVerts)
virtual void VPCALL ConvertJointMatsToJointQuats(idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints)
virtual void VPCALL MulAdd(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL MatX_MultiplySubVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)
virtual void VPCALL MatX_TransposeMultiplySubVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)
virtual void VPCALL MinMax(float &min, float &max, const float *src, const int count)
virtual void VPCALL Copy16(float *dst, const float *src, const int count)
virtual void VPCALL Sub(float *dst, const float constant, const float *src, const int count)
const float * ToFloatPtr(void) const
virtual void VPCALL TransformJoints(idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint)
virtual void VPCALL CmpLT(byte *dst, const float *src0, const float constant, const int count)
static float RSqrt(float x)
virtual void VPCALL Add16(float *dst, const float *src1, const float *src2, const int count)
virtual void VPCALL UpSampleOGGTo44kHz(float *dest, const float *const *ogg, const int numSamples, const int kHz, const int numChannels)
virtual void VPCALL CmpGT(byte *dst, const float *src0, const float constant, const int count)
virtual const char *VPCALL GetName(void) const
void FitThroughPoint(const idVec3 &p)