doom3-gpl
Doom 3 GPL source release
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Simd.cpp
Go to the documentation of this file.
1 /*
2 ===========================================================================
3 
4 Doom 3 GPL Source Code
5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
6 
7 This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
8 
9 Doom 3 Source Code is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 Doom 3 Source Code is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
21 
22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
23 
24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
25 
26 ===========================================================================
27 */
28 
29 #include "../precompiled.h"
30 #pragma hdrstop
31 
32 #include "Simd_Generic.h"
33 #include "Simd_MMX.h"
34 #include "Simd_3DNow.h"
35 #include "Simd_SSE.h"
36 #include "Simd_SSE2.h"
37 #include "Simd_SSE3.h"
38 #include "Simd_AltiVec.h"
39 
40 
41 idSIMDProcessor * processor = NULL; // pointer to SIMD processor
42 idSIMDProcessor * generic = NULL; // pointer to generic SIMD implementation
44 
45 
46 /*
47 ================
48 idSIMD::Init
49 ================
50 */
51 void idSIMD::Init( void ) {
52  generic = new idSIMD_Generic;
53  generic->cpuid = CPUID_GENERIC;
54  processor = NULL;
55  SIMDProcessor = generic;
56 }
57 
58 /*
59 ============
60 idSIMD::InitProcessor
61 ============
62 */
63 void idSIMD::InitProcessor( const char *module, bool forceGeneric ) {
64  cpuid_t cpuid;
65  idSIMDProcessor *newProcessor;
66 
67  cpuid = idLib::sys->GetProcessorId();
68 
69  if ( forceGeneric ) {
70 
71  newProcessor = generic;
72 
73  } else {
74 
75  if ( !processor ) {
76  if ( ( cpuid & CPUID_ALTIVEC ) ) {
77  processor = new idSIMD_AltiVec;
78  } else if ( ( cpuid & CPUID_MMX ) && ( cpuid & CPUID_SSE ) && ( cpuid & CPUID_SSE2 ) && ( cpuid & CPUID_SSE3 ) ) {
79  processor = new idSIMD_SSE3;
80  } else if ( ( cpuid & CPUID_MMX ) && ( cpuid & CPUID_SSE ) && ( cpuid & CPUID_SSE2 ) ) {
81  processor = new idSIMD_SSE2;
82  } else if ( ( cpuid & CPUID_MMX ) && ( cpuid & CPUID_SSE ) ) {
83  processor = new idSIMD_SSE;
84  } else if ( ( cpuid & CPUID_MMX ) && ( cpuid & CPUID_3DNOW ) ) {
85  processor = new idSIMD_3DNow;
86  } else if ( ( cpuid & CPUID_MMX ) ) {
87  processor = new idSIMD_MMX;
88  } else {
89  processor = generic;
90  }
91  processor->cpuid = cpuid;
92  }
93 
94  newProcessor = processor;
95  }
96 
97  if ( newProcessor != SIMDProcessor ) {
98  SIMDProcessor = newProcessor;
99  idLib::common->Printf( "%s using %s for SIMD processing\n", module, SIMDProcessor->GetName() );
100  }
101 
102  if ( cpuid & CPUID_FTZ ) {
103  idLib::sys->FPU_SetFTZ( true );
104  idLib::common->Printf( "enabled Flush-To-Zero mode\n" );
105  }
106 
107  if ( cpuid & CPUID_DAZ ) {
108  idLib::sys->FPU_SetDAZ( true );
109  idLib::common->Printf( "enabled Denormals-Are-Zero mode\n" );
110  }
111 }
112 
113 /*
114 ================
115 idSIMD::Shutdown
116 ================
117 */
118 void idSIMD::Shutdown( void ) {
119  if ( processor != generic ) {
120  delete processor;
121  }
122  delete generic;
123  generic = NULL;
124  processor = NULL;
125  SIMDProcessor = NULL;
126 }
127 
128 
129 //===============================================================
130 //
131 // Test code
132 //
133 //===============================================================
134 
135 #define COUNT 1024 // data count
136 #define NUMTESTS 2048 // number of tests
137 
138 #define RANDOM_SEED 1013904223L //((int)idLib::sys->GetClockTicks())
139 
142 long baseClocks = 0;
143 
144 #ifdef _WIN32
145 
146 #define TIME_TYPE int
147 
148 #pragma warning(disable : 4731) // frame pointer register 'ebx' modified by inline assembly code
149 
150 long saved_ebx = 0;
151 
152 #define StartRecordTime( start ) \
153  __asm mov saved_ebx, ebx \
154  __asm xor eax, eax \
155  __asm cpuid \
156  __asm rdtsc \
157  __asm mov start, eax \
158  __asm xor eax, eax \
159  __asm cpuid
160 
161 #define StopRecordTime( end ) \
162  __asm xor eax, eax \
163  __asm cpuid \
164  __asm rdtsc \
165  __asm mov end, eax \
166  __asm mov ebx, saved_ebx \
167  __asm xor eax, eax \
168  __asm cpuid
169 
170 #elif MACOS_X
171 
172 #include <stdlib.h>
173 #include <unistd.h> // this is for sleep()
174 #include <sys/time.h>
175 #include <sys/resource.h>
176 #include <mach/mach_time.h>
177 
178 double ticksPerNanosecond;
179 
180 #define TIME_TYPE uint64_t
181 
182 #ifdef __MWERKS__ //time_in_millisec is missing
183 /*
184 
185  .text
186  .align 2
187  .globl _GetTB
188 _GetTB:
189 
190 loop:
191  mftbu r4 ; load from TBU
192  mftb r5 ; load from TBL
193  mftbu r6 ; load from TBU
194  cmpw r6, r4 ; see if old == new
195  bne loop ; if not, carry occured, therefore loop
196 
197  stw r4, 0(r3)
198  stw r5, 4(r3)
199 
200 done:
201  blr ; return
202 
203 */
204 typedef struct {
205  unsigned int hi;
206  unsigned int lo;
207 } U64;
208 
209 
210 asm void GetTB(U64 *in)
211 {
212  nofralloc // suppress prolog
213  machine 603 // allows the use of mftb & mftbu functions
214 
215 loop:
216  mftbu r5 // grab the upper time base register (TBU)
217  mftb r4 // grab the lower time base register (TBL)
218  mftbu r6 // grab the upper time base register (TBU) again
219 
220  cmpw r6,r5 // see if old TBU == new TBU
221  bne- loop // loop if carry occurred (predict branch not taken)
222 
223  stw r4,4(r3) // store TBL in the low 32 bits of the return value
224  stw r5,0(r3) // store TBU in the high 32 bits of the return value
225 
226  blr
227 }
228 
229 
230 
231 
232 double TBToDoubleNano( U64 startTime, U64 stopTime, double ticksPerNanosecond );
233 
234 #if __MWERKS__
235 asm void GetTB( U64 * );
236 #else
237 void GetTB( U64 * );
238 #endif
239 
240 double TBToDoubleNano( U64 startTime, U64 stopTime, double ticksPerNanosecond ) {
241  #define K_2POWER32 4294967296.0
242  #define TICKS_PER_NANOSECOND 0.025
243  double nanoTime;
244  U64 diffTime;
245 
246  // calc the difference in TB ticks
247  diffTime.hi = stopTime.hi - startTime.hi;
248  diffTime.lo = stopTime.lo - startTime.lo;
249 
250  // convert TB ticks into time
251  nanoTime = (double)(diffTime.hi)*((double)K_2POWER32) + (double)(diffTime.lo);
252  nanoTime = nanoTime/ticksPerNanosecond;
253  return (nanoTime);
254 }
255 
256 TIME_TYPE time_in_millisec( void ) {
257  #define K_2POWER32 4294967296.0
258  #define TICKS_PER_NANOSECOND 0.025
259 
260  U64 the_time;
261  double nanoTime, milliTime;
262 
263  GetTB( &the_time );
264 
265  // convert TB ticks into time
266  nanoTime = (double)(the_time.hi)*((double)K_2POWER32) + (double)(the_time.lo);
267  nanoTime = nanoTime/ticksPerNanosecond;
268 
269  // nanoseconds are 1 billionth of a second. I want milliseconds
270  milliTime = nanoTime * 1000000.0;
271 
272  printf( "ticks per nanosec -- %lf\n", ticksPerNanosecond );
273  printf( "nanoTime is %lf -- milliTime is %lf -- as int is %i\n", nanoTime, milliTime, (int)milliTime );
274 
275  return (int)milliTime;
276 }
277 
278 #define StartRecordTime( start ) \
279  start = time_in_millisec();
280 
281 #define StopRecordTime( end ) \
282  end = time_in_millisec();
283 
284 
285 #else
286 #define StartRecordTime( start ) \
287  start = mach_absolute_time();
288 
289 #define StopRecordTime( end ) \
290  end = mach_absolute_time();
291 #endif
292 #else
293 
294 #define TIME_TYPE int
295 
296 #define StartRecordTime( start ) \
297  start = 0;
298 
299 #define StopRecordTime( end ) \
300  end = 1;
301 
302 #endif
303 
304 #define GetBest( start, end, best ) \
305  if ( !best || end - start < best ) { \
306  best = end - start; \
307  }
308 
309 
310 /*
311 ============
312 PrintClocks
313 ============
314 */
315 void PrintClocks( const char *string, int dataCount, int clocks, int otherClocks = 0 ) {
316  int i;
317 
318  idLib::common->Printf( string );
319  for ( i = idStr::LengthWithoutColors(string); i < 48; i++ ) {
320  idLib::common->Printf(" ");
321  }
322  clocks -= baseClocks;
323  if ( otherClocks && clocks ) {
324  otherClocks -= baseClocks;
325  int p = (int) ( (float) ( otherClocks - clocks ) * 100.0f / (float) otherClocks );
326  idLib::common->Printf( "c = %4d, clcks = %5d, %d%%\n", dataCount, clocks, p );
327  } else {
328  idLib::common->Printf( "c = %4d, clcks = %5d\n", dataCount, clocks );
329  }
330 }
331 
332 /*
333 ============
334 GetBaseClocks
335 ============
336 */
337 void GetBaseClocks( void ) {
338  int i, start, end, bestClocks;
339 
340  bestClocks = 0;
341  for ( i = 0; i < NUMTESTS; i++ ) {
342  StartRecordTime( start );
343  StopRecordTime( end );
344  GetBest( start, end, bestClocks );
345  }
346  baseClocks = bestClocks;
347 }
348 
349 /*
350 ============
351 TestAdd
352 ============
353 */
354 void TestAdd( void ) {
355  int i;
356  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
357  ALIGN16( float fdst0[COUNT] );
358  ALIGN16( float fdst1[COUNT] );
359  ALIGN16( float fsrc0[COUNT] );
360  ALIGN16( float fsrc1[COUNT] );
361  const char *result;
362 
363  idRandom srnd( RANDOM_SEED );
364 
365  for ( i = 0; i < COUNT; i++ ) {
366  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
367  fsrc1[i] = srnd.CRandomFloat() * 10.0f;
368  }
369 
370  idLib::common->Printf("====================================\n" );
371 
372  bestClocksGeneric = 0;
373  for ( i = 0; i < NUMTESTS; i++ ) {
374  StartRecordTime( start );
375  p_generic->Add( fdst0, 4.0f, fsrc1, COUNT );
376  StopRecordTime( end );
377  GetBest( start, end, bestClocksGeneric );
378  }
379  PrintClocks( "generic->Add( float + float[] )", COUNT, bestClocksGeneric );
380 
381  bestClocksSIMD = 0;
382  for ( i = 0; i < NUMTESTS; i++ ) {
383  StartRecordTime( start );
384  p_simd->Add( fdst1, 4.0f, fsrc1, COUNT );
385  StopRecordTime( end );
386  GetBest( start, end, bestClocksSIMD );
387  }
388 
389  for ( i = 0; i < COUNT; i++ ) {
390  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
391  break;
392  }
393  }
394  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
395  PrintClocks( va( " simd->Add( float + float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
396 
397  bestClocksGeneric = 0;
398  for ( i = 0; i < NUMTESTS; i++ ) {
399  StartRecordTime( start );
400  p_generic->Add( fdst0, fsrc0, fsrc1, COUNT );
401  StopRecordTime( end );
402  GetBest( start, end, bestClocksGeneric );
403  }
404  PrintClocks( "generic->Add( float[] + float[] )", COUNT, bestClocksGeneric );
405 
406  bestClocksSIMD = 0;
407  for ( i = 0; i < NUMTESTS; i++ ) {
408  StartRecordTime( start );
409  p_simd->Add( fdst1, fsrc0, fsrc1, COUNT );
410  StopRecordTime( end );
411  GetBest( start, end, bestClocksSIMD );
412  }
413 
414  for ( i = 0; i < COUNT; i++ ) {
415  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
416  break;
417  }
418  }
419  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
420  PrintClocks( va( " simd->Add( float[] + float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
421 }
422 
423 /*
424 ============
425 TestSub
426 ============
427 */
428 void TestSub( void ) {
429  int i;
430  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
431  ALIGN16( float fdst0[COUNT] );
432  ALIGN16( float fdst1[COUNT] );
433  ALIGN16( float fsrc0[COUNT] );
434  ALIGN16( float fsrc1[COUNT] );
435  const char *result;
436 
437  idRandom srnd( RANDOM_SEED );
438 
439  for ( i = 0; i < COUNT; i++ ) {
440  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
441  fsrc1[i] = srnd.CRandomFloat() * 10.0f;
442  }
443 
444  idLib::common->Printf("====================================\n" );
445 
446  bestClocksGeneric = 0;
447  for ( i = 0; i < NUMTESTS; i++ ) {
448  StartRecordTime( start );
449  p_generic->Sub( fdst0, 4.0f, fsrc1, COUNT );
450  StopRecordTime( end );
451  GetBest( start, end, bestClocksGeneric );
452  }
453  PrintClocks( "generic->Sub( float + float[] )", COUNT, bestClocksGeneric );
454 
455  bestClocksSIMD = 0;
456  for ( i = 0; i < NUMTESTS; i++ ) {
457  StartRecordTime( start );
458  p_simd->Sub( fdst1, 4.0f, fsrc1, COUNT );
459  StopRecordTime( end );
460  GetBest( start, end, bestClocksSIMD );
461  }
462 
463  for ( i = 0; i < COUNT; i++ ) {
464  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
465  break;
466  }
467  }
468  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
469  PrintClocks( va( " simd->Sub( float + float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
470 
471  bestClocksGeneric = 0;
472  for ( i = 0; i < NUMTESTS; i++ ) {
473  StartRecordTime( start );
474  p_generic->Sub( fdst0, fsrc0, fsrc1, COUNT );
475  StopRecordTime( end );
476  GetBest( start, end, bestClocksGeneric );
477  }
478  PrintClocks( "generic->Sub( float[] + float[] )", COUNT, bestClocksGeneric );
479 
480  bestClocksSIMD = 0;
481  for ( i = 0; i < NUMTESTS; i++ ) {
482  StartRecordTime( start );
483  p_simd->Sub( fdst1, fsrc0, fsrc1, COUNT );
484  StopRecordTime( end );
485  GetBest( start, end, bestClocksSIMD );
486  }
487 
488  for ( i = 0; i < COUNT; i++ ) {
489  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
490  break;
491  }
492  }
493  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
494  PrintClocks( va( " simd->Sub( float[] + float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
495 }
496 
497 /*
498 ============
499 TestMul
500 ============
501 */
502 void TestMul( void ) {
503  int i;
504  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
505  ALIGN16( float fdst0[COUNT] );
506  ALIGN16( float fdst1[COUNT] );
507  ALIGN16( float fsrc0[COUNT] );
508  ALIGN16( float fsrc1[COUNT] );
509  const char *result;
510 
511  idRandom srnd( RANDOM_SEED );
512 
513  for ( i = 0; i < COUNT; i++ ) {
514  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
515  fsrc1[i] = srnd.CRandomFloat() * 10.0f;
516  }
517 
518  idLib::common->Printf("====================================\n" );
519 
520  bestClocksGeneric = 0;
521  for ( i = 0; i < NUMTESTS; i++ ) {
522  StartRecordTime( start );
523  p_generic->Mul( fdst0, 4.0f, fsrc1, COUNT );
524  StopRecordTime( end );
525  GetBest( start, end, bestClocksGeneric );
526  }
527  PrintClocks( "generic->Mul( float * float[] )", COUNT, bestClocksGeneric );
528 
529  bestClocksSIMD = 0;
530  for ( i = 0; i < NUMTESTS; i++ ) {
531  StartRecordTime( start );
532  p_simd->Mul( fdst1, 4.0f, fsrc1, COUNT );
533  StopRecordTime( end );
534  GetBest( start, end, bestClocksSIMD );
535  }
536 
537  for ( i = 0; i < COUNT; i++ ) {
538  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
539  break;
540  }
541  }
542  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
543  PrintClocks( va( " simd->Mul( float * float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
544 
545 
546  bestClocksGeneric = 0;
547  for ( i = 0; i < NUMTESTS; i++ ) {
548  StartRecordTime( start );
549  p_generic->Mul( fdst0, fsrc0, fsrc1, COUNT );
550  StopRecordTime( end );
551  GetBest( start, end, bestClocksGeneric );
552  }
553  PrintClocks( "generic->Mul( float[] * float[] )", COUNT, bestClocksGeneric );
554 
555  bestClocksSIMD = 0;
556  for ( i = 0; i < NUMTESTS; i++ ) {
557  StartRecordTime( start );
558  p_simd->Mul( fdst1, fsrc0, fsrc1, COUNT );
559  StopRecordTime( end );
560  GetBest( start, end, bestClocksSIMD );
561  }
562 
563  for ( i = 0; i < COUNT; i++ ) {
564  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
565  break;
566  }
567  }
568  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
569  PrintClocks( va( " simd->Mul( float[] * float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
570 }
571 
572 /*
573 ============
574 TestDiv
575 ============
576 */
577 void TestDiv( void ) {
578  int i;
579  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
580  ALIGN16( float fdst0[COUNT] );
581  ALIGN16( float fdst1[COUNT] );
582  ALIGN16( float fsrc0[COUNT] );
583  ALIGN16( float fsrc1[COUNT] );
584  const char *result;
585 
586  idRandom srnd( RANDOM_SEED );
587 
588  for ( i = 0; i < COUNT; i++ ) {
589  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
590  do {
591  fsrc1[i] = srnd.CRandomFloat() * 10.0f;
592  } while( idMath::Fabs( fsrc1[i] ) < 0.1f );
593  }
594 
595  idLib::common->Printf("====================================\n" );
596 
597 
598  bestClocksGeneric = 0;
599  for ( i = 0; i < NUMTESTS; i++ ) {
600  StartRecordTime( start );
601  p_generic->Div( fdst0, 4.0f, fsrc1, COUNT );
602  StopRecordTime( end );
603  GetBest( start, end, bestClocksGeneric );
604  }
605  PrintClocks( "generic->Div( float * float[] )", COUNT, bestClocksGeneric );
606 
607  bestClocksSIMD = 0;
608  for ( i = 0; i < NUMTESTS; i++ ) {
609  StartRecordTime( start );
610  p_simd->Div( fdst1, 4.0f, fsrc1, COUNT );
611  StopRecordTime( end );
612  GetBest( start, end, bestClocksSIMD );
613  }
614 
615  for ( i = 0; i < COUNT; i++ ) {
616  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
617  break;
618  }
619  }
620  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
621  PrintClocks( va( " simd->Div( float * float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
622 
623 
624  bestClocksGeneric = 0;
625  for ( i = 0; i < NUMTESTS; i++ ) {
626  StartRecordTime( start );
627  p_generic->Div( fdst0, fsrc0, fsrc1, COUNT );
628  StopRecordTime( end );
629  GetBest( start, end, bestClocksGeneric );
630  }
631  PrintClocks( "generic->Div( float[] * float[] )", COUNT, bestClocksGeneric );
632 
633  bestClocksSIMD = 0;
634  for ( i = 0; i < NUMTESTS; i++ ) {
635  StartRecordTime( start );
636  p_simd->Div( fdst1, fsrc0, fsrc1, COUNT );
637  StopRecordTime( end );
638  GetBest( start, end, bestClocksSIMD );
639  }
640 
641  for ( i = 0; i < COUNT; i++ ) {
642  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-3f ) {
643  break;
644  }
645  }
646  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
647  PrintClocks( va( " simd->Div( float[] * float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
648 }
649 
650 /*
651 ============
652 TestMulAdd
653 ============
654 */
655 void TestMulAdd( void ) {
656  int i, j;
657  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
658  ALIGN16( float fdst0[COUNT] );
659  ALIGN16( float fdst1[COUNT] );
660  ALIGN16( float fsrc0[COUNT] );
661  const char *result;
662 
663  idRandom srnd( RANDOM_SEED );
664 
665  for ( i = 0; i < COUNT; i++ ) {
666  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
667  }
668 
669  idLib::common->Printf("====================================\n" );
670 
671  for ( j = 0; j < 50 && j < COUNT; j++ ) {
672 
673  bestClocksGeneric = 0;
674  for ( i = 0; i < NUMTESTS; i++ ) {
675  for ( int k = 0; k < COUNT; k++ ) {
676  fdst0[k] = k;
677  }
678  StartRecordTime( start );
679  p_generic->MulAdd( fdst0, 0.123f, fsrc0, j );
680  StopRecordTime( end );
681  GetBest( start, end, bestClocksGeneric );
682  }
683  PrintClocks( va( "generic->MulAdd( float * float[%2d] )", j ), 1, bestClocksGeneric );
684 
685  bestClocksSIMD = 0;
686  for ( i = 0; i < NUMTESTS; i++ ) {
687  for ( int k = 0; k < COUNT; k++ ) {
688  fdst1[k] = k;
689  }
690  StartRecordTime( start );
691  p_simd->MulAdd( fdst1, 0.123f, fsrc0, j );
692  StopRecordTime( end );
693  GetBest( start, end, bestClocksSIMD );
694  }
695 
696  for ( i = 0; i < COUNT; i++ ) {
697  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
698  break;
699  }
700  }
701  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
702  PrintClocks( va( " simd->MulAdd( float * float[%2d] ) %s", j, result ), 1, bestClocksSIMD, bestClocksGeneric );
703  }
704 }
705 
706 /*
707 ============
708 TestMulSub
709 ============
710 */
711 void TestMulSub( void ) {
712  int i, j;
713  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
714  ALIGN16( float fdst0[COUNT] );
715  ALIGN16( float fdst1[COUNT] );
716  ALIGN16( float fsrc0[COUNT] );
717  const char *result;
718 
719  idRandom srnd( RANDOM_SEED );
720 
721  for ( i = 0; i < COUNT; i++ ) {
722  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
723  }
724 
725  idLib::common->Printf("====================================\n" );
726 
727  for ( j = 0; j < 50 && j < COUNT; j++ ) {
728 
729  bestClocksGeneric = 0;
730  for ( i = 0; i < NUMTESTS; i++ ) {
731  for ( int k = 0; k < COUNT; k++ ) {
732  fdst0[k] = k;
733  }
734  StartRecordTime( start );
735  p_generic->MulSub( fdst0, 0.123f, fsrc0, j );
736  StopRecordTime( end );
737  GetBest( start, end, bestClocksGeneric );
738  }
739  PrintClocks( va( "generic->MulSub( float * float[%2d] )", j ), 1, bestClocksGeneric );
740 
741  bestClocksSIMD = 0;
742  for ( i = 0; i < NUMTESTS; i++ ) {
743  for ( int k = 0; k < COUNT; k++ ) {
744  fdst1[k] = k;
745  }
746  StartRecordTime( start );
747  p_simd->MulSub( fdst1, 0.123f, fsrc0, j );
748  StopRecordTime( end );
749  GetBest( start, end, bestClocksSIMD );
750  }
751 
752  for ( i = 0; i < COUNT; i++ ) {
753  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
754  break;
755  }
756  }
757  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
758  PrintClocks( va( " simd->MulSub( float * float[%2d] ) %s", j, result ), 1, bestClocksSIMD, bestClocksGeneric );
759  }
760 }
761 
762 /*
763 ============
764 TestDot
765 ============
766 */
767 void TestDot( void ) {
768  int i, j;
769  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
770  ALIGN16( float fdst0[COUNT] );
771  ALIGN16( float fdst1[COUNT] );
772  ALIGN16( float fsrc0[COUNT] );
773  ALIGN16( float fsrc1[COUNT] );
774  ALIGN16( idVec3 v3src0[COUNT] );
775  ALIGN16( idVec3 v3src1[COUNT] );
776  ALIGN16( idVec3 v3constant ) ( 1.0f, 2.0f, 3.0f );
777  ALIGN16( idPlane v4src0[COUNT] );
778  ALIGN16( idPlane v4constant ) (1.0f, 2.0f, 3.0f, 4.0f);
779  ALIGN16( idDrawVert drawVerts[COUNT] );
780  const char *result;
781 
782  idRandom srnd( RANDOM_SEED );
783 
784  for ( i = 0; i < COUNT; i++ ) {
785  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
786  fsrc1[i] = srnd.CRandomFloat() * 10.0f;
787  v3src0[i][0] = srnd.CRandomFloat() * 10.0f;
788  v3src0[i][1] = srnd.CRandomFloat() * 10.0f;
789  v3src0[i][2] = srnd.CRandomFloat() * 10.0f;
790  v3src1[i][0] = srnd.CRandomFloat() * 10.0f;
791  v3src1[i][1] = srnd.CRandomFloat() * 10.0f;
792  v3src1[i][2] = srnd.CRandomFloat() * 10.0f;
793  v4src0[i] = v3src0[i];
794  v4src0[i][3] = srnd.CRandomFloat() * 10.0f;
795  drawVerts[i].xyz = v3src0[i];
796  }
797 
798  idLib::common->Printf("====================================\n" );
799 
800 
801  bestClocksGeneric = 0;
802  for ( i = 0; i < NUMTESTS; i++ ) {
803  StartRecordTime( start );
804  p_generic->Dot( fdst0, v3constant, v3src0, COUNT );
805  StopRecordTime( end );
806  GetBest( start, end, bestClocksGeneric );
807  }
808  PrintClocks( "generic->Dot( idVec3 * idVec3[] )", COUNT, bestClocksGeneric );
809 
810  bestClocksSIMD = 0;
811  for ( i = 0; i < NUMTESTS; i++ ) {
812  StartRecordTime( start );
813  p_simd->Dot( fdst1, v3constant, v3src0, COUNT );
814  StopRecordTime( end );
815  GetBest( start, end, bestClocksSIMD );
816  }
817 
818  for ( i = 0; i < COUNT; i++ ) {
819  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
820  break;
821  }
822  }
823  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
824  PrintClocks( va( " simd->Dot( idVec3 * idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
825 
826 
827  bestClocksGeneric = 0;
828  for ( i = 0; i < NUMTESTS; i++ ) {
829  StartRecordTime( start );
830  p_generic->Dot( fdst0, v3constant, v4src0, COUNT );
831  StopRecordTime( end );
832  GetBest( start, end, bestClocksGeneric );
833  }
834  PrintClocks( "generic->Dot( idVec3 * idPlane[] )", COUNT, bestClocksGeneric );
835 
836  bestClocksSIMD = 0;
837  for ( i = 0; i < NUMTESTS; i++ ) {
838  StartRecordTime( start );
839  p_simd->Dot( fdst1, v3constant, v4src0, COUNT );
840  StopRecordTime( end );
841  GetBest( start, end, bestClocksSIMD );
842  }
843 
844  for ( i = 0; i < COUNT; i++ ) {
845  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
846  break;
847  }
848  }
849  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
850  PrintClocks( va( " simd->Dot( idVec3 * idPlane[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
851 
852 
853  bestClocksGeneric = 0;
854  for ( i = 0; i < NUMTESTS; i++ ) {
855  StartRecordTime( start );
856  p_generic->Dot( fdst0, v3constant, drawVerts, COUNT );
857  StopRecordTime( end );
858  GetBest( start, end, bestClocksGeneric );
859  }
860  PrintClocks( "generic->Dot( idVec3 * idDrawVert[] )", COUNT, bestClocksGeneric );
861 
862  bestClocksSIMD = 0;
863  for ( i = 0; i < NUMTESTS; i++ ) {
864  StartRecordTime( start );
865  p_simd->Dot( fdst1, v3constant, drawVerts, COUNT );
866  StopRecordTime( end );
867  GetBest( start, end, bestClocksSIMD );
868  }
869 
870  for ( i = 0; i < COUNT; i++ ) {
871  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
872  break;
873  }
874  }
875  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
876  PrintClocks( va( " simd->Dot( idVec3 * idDrawVert[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
877 
878 
879  bestClocksGeneric = 0;
880  for ( i = 0; i < NUMTESTS; i++ ) {
881  StartRecordTime( start );
882  p_generic->Dot( fdst0, v4constant, v3src0, COUNT );
883  StopRecordTime( end );
884  GetBest( start, end, bestClocksGeneric );
885  }
886  PrintClocks( "generic->Dot( idPlane * idVec3[] )", COUNT, bestClocksGeneric );
887 
888  bestClocksSIMD = 0;
889  for ( i = 0; i < NUMTESTS; i++ ) {
890  StartRecordTime( start );
891  p_simd->Dot( fdst1, v4constant, v3src0, COUNT );
892  StopRecordTime( end );
893  GetBest( start, end, bestClocksSIMD );
894  }
895 
896  for ( i = 0; i < COUNT; i++ ) {
897  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
898  break;
899  }
900  }
901  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
902  PrintClocks( va( " simd->Dot( idPlane * idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
903 
904 
905  bestClocksGeneric = 0;
906  for ( i = 0; i < NUMTESTS; i++ ) {
907  StartRecordTime( start );
908  p_generic->Dot( fdst0, v4constant, v4src0, COUNT );
909  StopRecordTime( end );
910  GetBest( start, end, bestClocksGeneric );
911  }
912  PrintClocks( "generic->Dot( idPlane * idPlane[] )", COUNT, bestClocksGeneric );
913 
914  bestClocksSIMD = 0;
915  for ( i = 0; i < NUMTESTS; i++ ) {
916  StartRecordTime( start );
917  p_simd->Dot( fdst1, v4constant, v4src0, COUNT );
918  StopRecordTime( end );
919  GetBest( start, end, bestClocksSIMD );
920  }
921 
922  for ( i = 0; i < COUNT; i++ ) {
923  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
924  break;
925  }
926  }
927  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
928  PrintClocks( va( " simd->Dot( idPlane * idPlane[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
929 
930 
931  bestClocksGeneric = 0;
932  for ( i = 0; i < NUMTESTS; i++ ) {
933  StartRecordTime( start );
934  p_generic->Dot( fdst0, v4constant, drawVerts, COUNT );
935  StopRecordTime( end );
936  GetBest( start, end, bestClocksGeneric );
937  }
938  PrintClocks( "generic->Dot( idPlane * idDrawVert[] )", COUNT, bestClocksGeneric );
939 
940  bestClocksSIMD = 0;
941  for ( i = 0; i < NUMTESTS; i++ ) {
942  StartRecordTime( start );
943  p_simd->Dot( fdst1, v4constant, drawVerts, COUNT );
944  StopRecordTime( end );
945  GetBest( start, end, bestClocksSIMD );
946  }
947 
948  for ( i = 0; i < COUNT; i++ ) {
949  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-5f ) {
950  break;
951  }
952  }
953  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
954  PrintClocks( va( " simd->Dot( idPlane * idDrawVert[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
955 
956 
957  bestClocksGeneric = 0;
958  for ( i = 0; i < NUMTESTS; i++ ) {
959  StartRecordTime( start );
960  p_generic->Dot( fdst0, v3src0, v3src1, COUNT );
961  StopRecordTime( end );
962  GetBest( start, end, bestClocksGeneric );
963  }
964  PrintClocks( "generic->Dot( idVec3[] * idVec3[] )", COUNT, bestClocksGeneric );
965 
966  bestClocksSIMD = 0;
967  for ( i = 0; i < NUMTESTS; i++ ) {
968  StartRecordTime( start );
969  p_simd->Dot( fdst1, v3src0, v3src1, COUNT );
970  StopRecordTime( end );
971  GetBest( start, end, bestClocksSIMD );
972  }
973 
974  for ( i = 0; i < COUNT; i++ ) {
975  if ( idMath::Fabs( fdst0[i] - fdst1[i] ) > 1e-4f ) {
976  break;
977  }
978  }
979  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
980  PrintClocks( va( " simd->Dot( idVec3[] * idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
981 
982 
983  idLib::common->Printf("====================================\n" );
984 
985  float dot1 = 0.0f, dot2 = 0.0f;
986  for ( j = 0; j < 50 && j < COUNT; j++ ) {
987 
988  bestClocksGeneric = 0;
989  for ( i = 0; i < NUMTESTS; i++ ) {
990  StartRecordTime( start );
991  p_generic->Dot( dot1, fsrc0, fsrc1, j );
992  StopRecordTime( end );
993  GetBest( start, end, bestClocksGeneric );
994  }
995  PrintClocks( va( "generic->Dot( float[%2d] * float[%2d] )", j, j ), 1, bestClocksGeneric );
996 
997  bestClocksSIMD = 0;
998  for ( i = 0; i < NUMTESTS; i++ ) {
999  StartRecordTime( start );
1000  p_simd->Dot( dot2, fsrc0, fsrc1, j );
1001  StopRecordTime( end );
1002  GetBest( start, end, bestClocksSIMD );
1003  }
1004  result = idMath::Fabs( dot1 - dot2 ) < 1e-4f ? "ok" : S_COLOR_RED"X";
1005  PrintClocks( va( " simd->Dot( float[%2d] * float[%2d] ) %s", j, j, result ), 1, bestClocksSIMD, bestClocksGeneric );
1006  }
1007 }
1008 
1009 /*
1010 ============
1011 TestCompare
1012 ============
1013 */
1014 void TestCompare( void ) {
1015  int i;
1016  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1017  ALIGN16( float fsrc0[COUNT] );
1018  ALIGN16( byte bytedst[COUNT] );
1019  ALIGN16( byte bytedst2[COUNT] );
1020  const char *result;
1021 
1022  idRandom srnd( RANDOM_SEED );
1023 
1024  for ( i = 0; i < COUNT; i++ ) {
1025  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
1026  }
1027 
1028  idLib::common->Printf("====================================\n" );
1029 
1030  bestClocksGeneric = 0;
1031  for ( i = 0; i < NUMTESTS; i++ ) {
1032  StartRecordTime( start );
1033  p_generic->CmpGT( bytedst, fsrc0, 0.0f, COUNT );
1034  StopRecordTime( end );
1035  GetBest( start, end, bestClocksGeneric );
1036  }
1037  PrintClocks( "generic->CmpGT( float[] >= float )", COUNT, bestClocksGeneric );
1038 
1039  bestClocksSIMD = 0;
1040  for ( i = 0; i < NUMTESTS; i++ ) {
1041  StartRecordTime( start );
1042  p_simd->CmpGT( bytedst2, fsrc0, 0.0f, COUNT );
1043  StopRecordTime( end );
1044  GetBest( start, end, bestClocksSIMD );
1045  }
1046 
1047  for ( i = 0; i < COUNT; i++ ) {
1048  if ( bytedst[i] != bytedst2[i] ) {
1049  break;
1050  }
1051  }
1052  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1053  PrintClocks( va( " simd->CmpGT( float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1054 
1055  bestClocksGeneric = 0;
1056  for ( i = 0; i < NUMTESTS; i++ ) {
1057  memset( bytedst, 0, COUNT );
1058  StartRecordTime( start );
1059  p_generic->CmpGT( bytedst, 2, fsrc0, 0.0f, COUNT );
1060  StopRecordTime( end );
1061  GetBest( start, end, bestClocksGeneric );
1062  }
1063  PrintClocks( "generic->CmpGT( 2, float[] >= float )", COUNT, bestClocksGeneric );
1064 
1065  bestClocksSIMD = 0;
1066  for ( i = 0; i < NUMTESTS; i++ ) {
1067  memset( bytedst2, 0, COUNT );
1068  StartRecordTime( start );
1069  p_simd->CmpGT( bytedst2, 2, fsrc0, 0.0f, COUNT );
1070  StopRecordTime( end );
1071  GetBest( start, end, bestClocksSIMD );
1072  }
1073 
1074  for ( i = 0; i < COUNT; i++ ) {
1075  if ( bytedst[i] != bytedst2[i] ) {
1076  break;
1077  }
1078  }
1079  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1080  PrintClocks( va( " simd->CmpGT( 2, float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1081 
1082  // ======================
1083 
1084  bestClocksGeneric = 0;
1085  for ( i = 0; i < NUMTESTS; i++ ) {
1086  StartRecordTime( start );
1087  p_generic->CmpGE( bytedst, fsrc0, 0.0f, COUNT );
1088  StopRecordTime( end );
1089  GetBest( start, end, bestClocksGeneric );
1090  }
1091  PrintClocks( "generic->CmpGE( float[] >= float )", COUNT, bestClocksGeneric );
1092 
1093  bestClocksSIMD = 0;
1094  for ( i = 0; i < NUMTESTS; i++ ) {
1095  StartRecordTime( start );
1096  p_simd->CmpGE( bytedst2, fsrc0, 0.0f, COUNT );
1097  StopRecordTime( end );
1098  GetBest( start, end, bestClocksSIMD );
1099  }
1100 
1101  for ( i = 0; i < COUNT; i++ ) {
1102  if ( bytedst[i] != bytedst2[i] ) {
1103  break;
1104  }
1105  }
1106  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1107  PrintClocks( va( " simd->CmpGE( float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1108 
1109  bestClocksGeneric = 0;
1110  for ( i = 0; i < NUMTESTS; i++ ) {
1111  memset( bytedst, 0, COUNT );
1112  StartRecordTime( start );
1113  p_generic->CmpGE( bytedst, 2, fsrc0, 0.0f, COUNT );
1114  StopRecordTime( end );
1115  GetBest( start, end, bestClocksGeneric );
1116  }
1117  PrintClocks( "generic->CmpGE( 2, float[] >= float )", COUNT, bestClocksGeneric );
1118 
1119  bestClocksSIMD = 0;
1120  for ( i = 0; i < NUMTESTS; i++ ) {
1121  memset( bytedst2, 0, COUNT );
1122  StartRecordTime( start );
1123  p_simd->CmpGE( bytedst2, 2, fsrc0, 0.0f, COUNT );
1124  StopRecordTime( end );
1125  GetBest( start, end, bestClocksSIMD );
1126  }
1127 
1128  for ( i = 0; i < COUNT; i++ ) {
1129  if ( bytedst[i] != bytedst2[i] ) {
1130  break;
1131  }
1132  }
1133  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1134  PrintClocks( va( " simd->CmpGE( 2, float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1135 
1136  // ======================
1137 
1138  bestClocksGeneric = 0;
1139  for ( i = 0; i < NUMTESTS; i++ ) {
1140  StartRecordTime( start );
1141  p_generic->CmpLT( bytedst, fsrc0, 0.0f, COUNT );
1142  StopRecordTime( end );
1143  GetBest( start, end, bestClocksGeneric );
1144  }
1145  PrintClocks( "generic->CmpLT( float[] >= float )", COUNT, bestClocksGeneric );
1146 
1147  bestClocksSIMD = 0;
1148  for ( i = 0; i < NUMTESTS; i++ ) {
1149  StartRecordTime( start );
1150  p_simd->CmpLT( bytedst2, fsrc0, 0.0f, COUNT );
1151  StopRecordTime( end );
1152  GetBest( start, end, bestClocksSIMD );
1153  }
1154 
1155  for ( i = 0; i < COUNT; i++ ) {
1156  if ( bytedst[i] != bytedst2[i] ) {
1157  break;
1158  }
1159  }
1160  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1161  PrintClocks( va( " simd->CmpLT( float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1162 
1163  bestClocksGeneric = 0;
1164  for ( i = 0; i < NUMTESTS; i++ ) {
1165  memset( bytedst, 0, COUNT );
1166  StartRecordTime( start );
1167  p_generic->CmpLT( bytedst, 2, fsrc0, 0.0f, COUNT );
1168  StopRecordTime( end );
1169  GetBest( start, end, bestClocksGeneric );
1170  }
1171  PrintClocks( "generic->CmpLT( 2, float[] >= float )", COUNT, bestClocksGeneric );
1172 
1173  bestClocksSIMD = 0;
1174  for ( i = 0; i < NUMTESTS; i++ ) {
1175  memset( bytedst2, 0, COUNT );
1176  StartRecordTime( start );
1177  p_simd->CmpLT( bytedst2, 2, fsrc0, 0.0f, COUNT );
1178  StopRecordTime( end );
1179  GetBest( start, end, bestClocksSIMD );
1180  }
1181 
1182  for ( i = 0; i < COUNT; i++ ) {
1183  if ( bytedst[i] != bytedst2[i] ) {
1184  break;
1185  }
1186  }
1187  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1188  PrintClocks( va( " simd->CmpLT( 2, float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1189 
1190  // ======================
1191 
1192  bestClocksGeneric = 0;
1193  for ( i = 0; i < NUMTESTS; i++ ) {
1194  StartRecordTime( start );
1195  p_generic->CmpLE( bytedst, fsrc0, 0.0f, COUNT );
1196  StopRecordTime( end );
1197  GetBest( start, end, bestClocksGeneric );
1198  }
1199  PrintClocks( "generic->CmpLE( float[] >= float )", COUNT, bestClocksGeneric );
1200 
1201  bestClocksSIMD = 0;
1202  for ( i = 0; i < NUMTESTS; i++ ) {
1203  StartRecordTime( start );
1204  p_simd->CmpLE( bytedst2, fsrc0, 0.0f, COUNT );
1205  StopRecordTime( end );
1206  GetBest( start, end, bestClocksSIMD );
1207  }
1208 
1209  for ( i = 0; i < COUNT; i++ ) {
1210  if ( bytedst[i] != bytedst2[i] ) {
1211  break;
1212  }
1213  }
1214  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1215  PrintClocks( va( " simd->CmpLE( float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1216 
1217  bestClocksGeneric = 0;
1218  for ( i = 0; i < NUMTESTS; i++ ) {
1219  memset( bytedst, 0, COUNT );
1220  StartRecordTime( start );
1221  p_generic->CmpLE( bytedst, 2, fsrc0, 0.0f, COUNT );
1222  StopRecordTime( end );
1223  GetBest( start, end, bestClocksGeneric );
1224  }
1225  PrintClocks( "generic->CmpLE( 2, float[] >= float )", COUNT, bestClocksGeneric );
1226 
1227  bestClocksSIMD = 0;
1228  for ( i = 0; i < NUMTESTS; i++ ) {
1229  memset( bytedst2, 0, COUNT );
1230  StartRecordTime( start );
1231  p_simd->CmpLE( bytedst2, 2, fsrc0, 0.0f, COUNT );
1232  StopRecordTime( end );
1233  GetBest( start, end, bestClocksSIMD );
1234  }
1235 
1236  for ( i = 0; i < COUNT; i++ ) {
1237  if ( bytedst[i] != bytedst2[i] ) {
1238  break;
1239  }
1240  }
1241  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1242  PrintClocks( va( " simd->CmpLE( 2, float[] >= float ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1243 }
1244 
1245 /*
1246 ============
1247 TestMinMax
1248 ============
1249 */
1250 void TestMinMax( void ) {
1251  int i;
1252  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1253  ALIGN16( float fsrc0[COUNT] );
1254  ALIGN16( idVec2 v2src0[COUNT] );
1255  ALIGN16( idVec3 v3src0[COUNT] );
1256  ALIGN16( idDrawVert drawVerts[COUNT] );
1257  ALIGN16( int indexes[COUNT] );
1258  float min = 0.0f, max = 0.0f, min2 = 0.0f, max2 = 0.0f;
1259  idVec2 v2min, v2max, v2min2, v2max2;
1260  idVec3 vmin, vmax, vmin2, vmax2;
1261  const char *result;
1262 
1263  idRandom srnd( RANDOM_SEED );
1264 
1265  for ( i = 0; i < COUNT; i++ ) {
1266  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
1267  v2src0[i][0] = srnd.CRandomFloat() * 10.0f;
1268  v2src0[i][1] = srnd.CRandomFloat() * 10.0f;
1269  v3src0[i][0] = srnd.CRandomFloat() * 10.0f;
1270  v3src0[i][1] = srnd.CRandomFloat() * 10.0f;
1271  v3src0[i][2] = srnd.CRandomFloat() * 10.0f;
1272  drawVerts[i].xyz = v3src0[i];
1273  indexes[i] = i;
1274  }
1275 
1276  idLib::common->Printf("====================================\n" );
1277 
1278  bestClocksGeneric = 0;
1279  for ( i = 0; i < NUMTESTS; i++ ) {
1280  min = idMath::INFINITY;
1281  max = -idMath::INFINITY;
1282  StartRecordTime( start );
1283  p_generic->MinMax( min, max, fsrc0, COUNT );
1284  StopRecordTime( end );
1285  GetBest( start, end, bestClocksGeneric );
1286  }
1287  PrintClocks( "generic->MinMax( float[] )", COUNT, bestClocksGeneric );
1288 
1289  bestClocksSIMD = 0;
1290  for ( i = 0; i < NUMTESTS; i++ ) {
1291  StartRecordTime( start );
1292  p_simd->MinMax( min2, max2, fsrc0, COUNT );
1293  StopRecordTime( end );
1294  GetBest( start, end, bestClocksSIMD );
1295  }
1296 
1297  result = ( min == min2 && max == max2 ) ? "ok" : S_COLOR_RED"X";
1298  PrintClocks( va( " simd->MinMax( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1299 
1300  bestClocksGeneric = 0;
1301  for ( i = 0; i < NUMTESTS; i++ ) {
1302  StartRecordTime( start );
1303  p_generic->MinMax( v2min, v2max, v2src0, COUNT );
1304  StopRecordTime( end );
1305  GetBest( start, end, bestClocksGeneric );
1306  }
1307  PrintClocks( "generic->MinMax( idVec2[] )", COUNT, bestClocksGeneric );
1308 
1309  bestClocksSIMD = 0;
1310  for ( i = 0; i < NUMTESTS; i++ ) {
1311  StartRecordTime( start );
1312  p_simd->MinMax( v2min2, v2max2, v2src0, COUNT );
1313  StopRecordTime( end );
1314  GetBest( start, end, bestClocksSIMD );
1315  }
1316 
1317  result = ( v2min == v2min2 && v2max == v2max2 ) ? "ok" : S_COLOR_RED"X";
1318  PrintClocks( va( " simd->MinMax( idVec2[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1319 
1320  bestClocksGeneric = 0;
1321  for ( i = 0; i < NUMTESTS; i++ ) {
1322  StartRecordTime( start );
1323  p_generic->MinMax( vmin, vmax, v3src0, COUNT );
1324  StopRecordTime( end );
1325  GetBest( start, end, bestClocksGeneric );
1326  }
1327  PrintClocks( "generic->MinMax( idVec3[] )", COUNT, bestClocksGeneric );
1328 
1329  bestClocksSIMD = 0;
1330  for ( i = 0; i < NUMTESTS; i++ ) {
1331  StartRecordTime( start );
1332  p_simd->MinMax( vmin2, vmax2, v3src0, COUNT );
1333  StopRecordTime( end );
1334  GetBest( start, end, bestClocksSIMD );
1335  }
1336 
1337  result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
1338  PrintClocks( va( " simd->MinMax( idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1339 
1340  bestClocksGeneric = 0;
1341  for ( i = 0; i < NUMTESTS; i++ ) {
1342  StartRecordTime( start );
1343  p_generic->MinMax( vmin, vmax, drawVerts, COUNT );
1344  StopRecordTime( end );
1345  GetBest( start, end, bestClocksGeneric );
1346  }
1347  PrintClocks( "generic->MinMax( idDrawVert[] )", COUNT, bestClocksGeneric );
1348 
1349  bestClocksSIMD = 0;
1350  for ( i = 0; i < NUMTESTS; i++ ) {
1351  StartRecordTime( start );
1352  p_simd->MinMax( vmin2, vmax2, drawVerts, COUNT );
1353  StopRecordTime( end );
1354  GetBest( start, end, bestClocksSIMD );
1355  }
1356 
1357  result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
1358  PrintClocks( va( " simd->MinMax( idDrawVert[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1359 
1360  bestClocksGeneric = 0;
1361  for ( i = 0; i < NUMTESTS; i++ ) {
1362  StartRecordTime( start );
1363  p_generic->MinMax( vmin, vmax, drawVerts, indexes, COUNT );
1364  StopRecordTime( end );
1365  GetBest( start, end, bestClocksGeneric );
1366  }
1367  PrintClocks( "generic->MinMax( idDrawVert[], indexes[] )", COUNT, bestClocksGeneric );
1368 
1369  bestClocksSIMD = 0;
1370  for ( i = 0; i < NUMTESTS; i++ ) {
1371  StartRecordTime( start );
1372  p_simd->MinMax( vmin2, vmax2, drawVerts, indexes, COUNT );
1373  StopRecordTime( end );
1374  GetBest( start, end, bestClocksSIMD );
1375  }
1376 
1377  result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
1378  PrintClocks( va( " simd->MinMax( idDrawVert[], indexes[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1379 }
1380 
1381 /*
1382 ============
1383 TestClamp
1384 ============
1385 */
1386 void TestClamp( void ) {
1387  int i;
1388  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1389  ALIGN16( float fdst0[COUNT] );
1390  ALIGN16( float fdst1[COUNT] );
1391  ALIGN16( float fsrc0[COUNT] );
1392  const char *result;
1393 
1394  idRandom srnd( RANDOM_SEED );
1395 
1396  for ( i = 0; i < COUNT; i++ ) {
1397  fsrc0[i] = srnd.CRandomFloat() * 10.0f;
1398  }
1399 
1400  idLib::common->Printf("====================================\n" );
1401 
1402  bestClocksGeneric = 0;
1403  for ( i = 0; i < NUMTESTS; i++ ) {
1404  StartRecordTime( start );
1405  p_generic->Clamp( fdst0, fsrc0, -1.0f, 1.0f, COUNT );
1406  StopRecordTime( end );
1407  GetBest( start, end, bestClocksGeneric );
1408  }
1409  PrintClocks( "generic->Clamp( float[] )", COUNT, bestClocksGeneric );
1410 
1411  bestClocksSIMD = 0;
1412  for ( i = 0; i < NUMTESTS; i++ ) {
1413  StartRecordTime( start );
1414  p_simd->Clamp( fdst1, fsrc0, -1.0f, 1.0f, COUNT );
1415  StopRecordTime( end );
1416  GetBest( start, end, bestClocksSIMD );
1417  }
1418 
1419  for ( i = 0; i < COUNT; i++ ) {
1420  if ( fdst0[i] != fdst1[i] ) {
1421  break;
1422  }
1423  }
1424  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1425  PrintClocks( va( " simd->Clamp( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1426 
1427 
1428  bestClocksGeneric = 0;
1429  for ( i = 0; i < NUMTESTS; i++ ) {
1430  StartRecordTime( start );
1431  p_generic->ClampMin( fdst0, fsrc0, -1.0f, COUNT );
1432  StopRecordTime( end );
1433  GetBest( start, end, bestClocksGeneric );
1434  }
1435  PrintClocks( "generic->ClampMin( float[] )", COUNT, bestClocksGeneric );
1436 
1437  bestClocksSIMD = 0;
1438  for ( i = 0; i < NUMTESTS; i++ ) {
1439  StartRecordTime( start );
1440  p_simd->ClampMin( fdst1, fsrc0, -1.0f, COUNT );
1441  StopRecordTime( end );
1442  GetBest( start, end, bestClocksSIMD );
1443  }
1444 
1445  for ( i = 0; i < COUNT; i++ ) {
1446  if ( fdst0[i] != fdst1[i] ) {
1447  break;
1448  }
1449  }
1450  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1451  PrintClocks( va( " simd->ClampMin( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1452 
1453 
1454  bestClocksGeneric = 0;
1455  for ( i = 0; i < NUMTESTS; i++ ) {
1456  StartRecordTime( start );
1457  p_generic->ClampMax( fdst0, fsrc0, 1.0f, COUNT );
1458  StopRecordTime( end );
1459  GetBest( start, end, bestClocksGeneric );
1460  }
1461  PrintClocks( "generic->ClampMax( float[] )", COUNT, bestClocksGeneric );
1462 
1463  bestClocksSIMD = 0;
1464  for ( i = 0; i < NUMTESTS; i++ ) {
1465  StartRecordTime( start );
1466  p_simd->ClampMax( fdst1, fsrc0, 1.0f, COUNT );
1467  StopRecordTime( end );
1468  GetBest( start, end, bestClocksSIMD );
1469  }
1470 
1471  for ( i = 0; i < COUNT; i++ ) {
1472  if ( fdst0[i] != fdst1[i] ) {
1473  break;
1474  }
1475  }
1476  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
1477  PrintClocks( va( " simd->ClampMax( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
1478 }
1479 
1480 /*
1481 ============
1482 TestMemcpy
1483 ============
1484 */
1485 void TestMemcpy( void ) {
1486  int i, j;
1487  byte test0[8192];
1488  byte test1[8192];
1489 
1490  idRandom random( RANDOM_SEED );
1491 
1492  idLib::common->Printf("====================================\n" );
1493 
1494  for ( i = 5; i < 8192; i += 31 ) {
1495  for ( j = 0; j < i; j++ ) {
1496  test0[j] = random.RandomInt( 255 );
1497  }
1498  p_simd->Memcpy( test1, test0, 8192 );
1499  for ( j = 0; j < i; j++ ) {
1500  if ( test1[j] != test0[j] ) {
1501  idLib::common->Printf( " simd->Memcpy() "S_COLOR_RED"X\n" );
1502  return;
1503  }
1504  }
1505  }
1506  idLib::common->Printf( " simd->Memcpy() ok\n" );
1507 }
1508 
1509 /*
1510 ============
1511 TestMemset
1512 ============
1513 */
1514 void TestMemset( void ) {
1515  int i, j, k;
1516  byte test[8192];
1517 
1518  for ( i = 0; i < 8192; i++ ) {
1519  test[i] = 0;
1520  }
1521 
1522  for ( i = 5; i < 8192; i += 31 ) {
1523  for ( j = -1; j <= 1; j++ ) {
1524  p_simd->Memset( test, j, i );
1525  for ( k = 0; k < i; k++ ) {
1526  if ( test[k] != (byte)j ) {
1527  idLib::common->Printf( " simd->Memset() "S_COLOR_RED"X\n" );
1528  return;
1529  }
1530  }
1531  }
1532  }
1533  idLib::common->Printf( " simd->Memset() ok\n" );
1534 }
1535 
1536 #define MATX_SIMD_EPSILON 1e-5f
1537 
1538 /*
1539 ============
1540 TestMatXMultiplyVecX
1541 ============
1542 */
1543 void TestMatXMultiplyVecX( void ) {
1544  int i, j;
1545  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1546  const char *result;
1547  idMatX mat;
1548  idVecX src(6);
1549  idVecX dst(6);
1550  idVecX tst(6);
1551 
1552  src[0] = 1.0f;
1553  src[1] = 2.0f;
1554  src[2] = 3.0f;
1555  src[3] = 4.0f;
1556  src[4] = 5.0f;
1557  src[5] = 6.0f;
1558 
1559  idLib::common->Printf("================= NxN * Nx1 ===================\n" );
1560 
1561  for ( i = 1; i <= 6; i++ ) {
1562  mat.Random( i, i, RANDOM_SEED, -10.0f, 10.0f );
1563 
1564  bestClocksGeneric = 0;
1565  for ( j = 0; j < NUMTESTS; j++ ) {
1566  dst.Zero();
1567  StartRecordTime( start );
1568  p_generic->MatX_MultiplyVecX( dst, mat, src );
1569  StopRecordTime( end );
1570  GetBest( start, end, bestClocksGeneric );
1571  }
1572  tst = dst;
1573 
1574  PrintClocks( va( "generic->MatX_MultiplyVecX %dx%d*%dx1", i, i, i ), 1, bestClocksGeneric );
1575 
1576  bestClocksSIMD = 0;
1577  for ( j = 0; j < NUMTESTS; j++ ) {
1578  dst.Zero();
1579  StartRecordTime( start );
1580  p_simd->MatX_MultiplyVecX( dst, mat, src );
1581  StopRecordTime( end );
1582  GetBest( start, end, bestClocksSIMD );
1583  }
1584 
1585  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1586  PrintClocks( va( " simd->MatX_MultiplyVecX %dx%d*%dx1 %s", i, i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1587  }
1588 
1589  idLib::common->Printf("================= Nx6 * 6x1 ===================\n" );
1590 
1591  for ( i = 1; i <= 6; i++ ) {
1592  mat.Random( i, 6, RANDOM_SEED, -10.0f, 10.0f );
1593 
1594  bestClocksGeneric = 0;
1595  for ( j = 0; j < NUMTESTS; j++ ) {
1596  dst.Zero();
1597  StartRecordTime( start );
1598  p_generic->MatX_MultiplyVecX( dst, mat, src );
1599  StopRecordTime( end );
1600  GetBest( start, end, bestClocksGeneric );
1601  }
1602  tst = dst;
1603 
1604  PrintClocks( va( "generic->MatX_MultiplyVecX %dx6*6x1", i ), 1, bestClocksGeneric );
1605 
1606  bestClocksSIMD = 0;
1607  for ( j = 0; j < NUMTESTS; j++ ) {
1608  dst.Zero();
1609  StartRecordTime( start );
1610  p_simd->MatX_MultiplyVecX( dst, mat, src );
1611  StopRecordTime( end );
1612  GetBest( start, end, bestClocksSIMD );
1613  }
1614 
1615  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1616  PrintClocks( va( " simd->MatX_MultiplyVecX %dx6*6x1 %s", i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1617  }
1618 
1619  idLib::common->Printf("================= 6xN * Nx1 ===================\n" );
1620 
1621  for ( i = 1; i <= 6; i++ ) {
1622  mat.Random( 6, i, RANDOM_SEED, -10.0f, 10.0f );
1623 
1624  bestClocksGeneric = 0;
1625  for ( j = 0; j < NUMTESTS; j++ ) {
1626  dst.Zero();
1627  StartRecordTime( start );
1628  p_generic->MatX_MultiplyVecX( dst, mat, src );
1629  StopRecordTime( end );
1630  GetBest( start, end, bestClocksGeneric );
1631  }
1632  tst = dst;
1633 
1634  PrintClocks( va( "generic->MatX_MultiplyVecX 6x%d*%dx1", i, i ), 1, bestClocksGeneric );
1635 
1636  bestClocksSIMD = 0;
1637  for ( j = 0; j < NUMTESTS; j++ ) {
1638  StartRecordTime( start );
1639  p_simd->MatX_MultiplyVecX( dst, mat, src );
1640  StopRecordTime( end );
1641  GetBest( start, end, bestClocksSIMD );
1642  }
1643 
1644  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1645  PrintClocks( va( " simd->MatX_MultiplyVecX 6x%d*%dx1 %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1646  }
1647 }
1648 
1649 /*
1650 ============
1651 TestMatXMultiplyAddVecX
1652 ============
1653 */
1655  int i, j;
1656  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1657  const char *result;
1658  idMatX mat;
1659  idVecX src(6);
1660  idVecX dst(6);
1661  idVecX tst(6);
1662 
1663  src[0] = 1.0f;
1664  src[1] = 2.0f;
1665  src[2] = 3.0f;
1666  src[3] = 4.0f;
1667  src[4] = 5.0f;
1668  src[5] = 6.0f;
1669 
1670  idLib::common->Printf("================= NxN * Nx1 ===================\n" );
1671 
1672  for ( i = 1; i <= 6; i++ ) {
1673  mat.Random( i, i, RANDOM_SEED, -10.0f, 10.0f );
1674 
1675  bestClocksGeneric = 0;
1676  for ( j = 0; j < NUMTESTS; j++ ) {
1677  dst.Zero();
1678  StartRecordTime( start );
1679  p_generic->MatX_MultiplyAddVecX( dst, mat, src );
1680  StopRecordTime( end );
1681  GetBest( start, end, bestClocksGeneric );
1682  }
1683  tst = dst;
1684 
1685  PrintClocks( va( "generic->MatX_MultiplyAddVecX %dx%d*%dx1", i, i, i ), 1, bestClocksGeneric );
1686 
1687  bestClocksSIMD = 0;
1688  for ( j = 0; j < NUMTESTS; j++ ) {
1689  dst.Zero();
1690  StartRecordTime( start );
1691  p_simd->MatX_MultiplyAddVecX( dst, mat, src );
1692  StopRecordTime( end );
1693  GetBest( start, end, bestClocksSIMD );
1694  }
1695 
1696  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1697  PrintClocks( va( " simd->MatX_MultiplyAddVecX %dx%d*%dx1 %s", i, i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1698  }
1699 
1700  idLib::common->Printf("================= Nx6 * 6x1 ===================\n" );
1701 
1702  for ( i = 1; i <= 6; i++ ) {
1703  mat.Random( i, 6, RANDOM_SEED, -10.0f, 10.0f );
1704 
1705  bestClocksGeneric = 0;
1706  for ( j = 0; j < NUMTESTS; j++ ) {
1707  dst.Zero();
1708  StartRecordTime( start );
1709  p_generic->MatX_MultiplyAddVecX( dst, mat, src );
1710  StopRecordTime( end );
1711  GetBest( start, end, bestClocksGeneric );
1712  }
1713  tst = dst;
1714 
1715  PrintClocks( va( "generic->MatX_MultiplyAddVecX %dx6*6x1", i ), 1, bestClocksGeneric );
1716 
1717  bestClocksSIMD = 0;
1718  for ( j = 0; j < NUMTESTS; j++ ) {
1719  dst.Zero();
1720  StartRecordTime( start );
1721  p_simd->MatX_MultiplyAddVecX( dst, mat, src );
1722  StopRecordTime( end );
1723  GetBest( start, end, bestClocksSIMD );
1724  }
1725 
1726  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1727  PrintClocks( va( " simd->MatX_MultiplyAddVecX %dx6*6x1 %s", i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1728  }
1729 
1730  idLib::common->Printf("================= 6xN * Nx1 ===================\n" );
1731 
1732  for ( i = 1; i <= 6; i++ ) {
1733  mat.Random( 6, i, RANDOM_SEED, -10.0f, 10.0f );
1734 
1735  bestClocksGeneric = 0;
1736  for ( j = 0; j < NUMTESTS; j++ ) {
1737  dst.Zero();
1738  StartRecordTime( start );
1739  p_generic->MatX_MultiplyAddVecX( dst, mat, src );
1740  StopRecordTime( end );
1741  GetBest( start, end, bestClocksGeneric );
1742  }
1743  tst = dst;
1744 
1745  PrintClocks( va( "generic->MatX_MultiplyAddVecX 6x%d*%dx1", i, i ), 1, bestClocksGeneric );
1746 
1747  bestClocksSIMD = 0;
1748  for ( j = 0; j < NUMTESTS; j++ ) {
1749  dst.Zero();
1750  StartRecordTime( start );
1751  p_simd->MatX_MultiplyAddVecX( dst, mat, src );
1752  StopRecordTime( end );
1753  GetBest( start, end, bestClocksSIMD );
1754  }
1755 
1756  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1757  PrintClocks( va( " simd->MatX_MultiplyAddVecX 6x%d*%dx1 %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1758  }
1759 }
1760 
1761 /*
1762 ============
1763 TestMatXTransposeMultiplyVecX
1764 ============
1765 */
1767  int i, j;
1768  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1769  const char *result;
1770  idMatX mat;
1771  idVecX src(6);
1772  idVecX dst(6);
1773  idVecX tst(6);
1774 
1775  src[0] = 1.0f;
1776  src[1] = 2.0f;
1777  src[2] = 3.0f;
1778  src[3] = 4.0f;
1779  src[4] = 5.0f;
1780  src[5] = 6.0f;
1781 
1782  idLib::common->Printf("================= Nx6 * Nx1 ===================\n" );
1783 
1784  for ( i = 1; i <= 6; i++ ) {
1785  mat.Random( i, 6, RANDOM_SEED, -10.0f, 10.0f );
1786 
1787  bestClocksGeneric = 0;
1788  for ( j = 0; j < NUMTESTS; j++ ) {
1789  dst.Zero();
1790  StartRecordTime( start );
1791  p_generic->MatX_TransposeMultiplyVecX( dst, mat, src );
1792  StopRecordTime( end );
1793  GetBest( start, end, bestClocksGeneric );
1794  }
1795  tst = dst;
1796 
1797  PrintClocks( va( "generic->MatX_TransposeMulVecX %dx6*%dx1", i, i ), 1, bestClocksGeneric );
1798 
1799  bestClocksSIMD = 0;
1800  for ( j = 0; j < NUMTESTS; j++ ) {
1801  dst.Zero();
1802  StartRecordTime( start );
1803  p_simd->MatX_TransposeMultiplyVecX( dst, mat, src );
1804  StopRecordTime( end );
1805  GetBest( start, end, bestClocksSIMD );
1806  }
1807 
1808  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1809  PrintClocks( va( " simd->MatX_TransposeMulVecX %dx6*%dx1 %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1810  }
1811 
1812  idLib::common->Printf("================= 6xN * 6x1 ===================\n" );
1813 
1814  for ( i = 1; i <= 6; i++ ) {
1815  mat.Random( 6, i, RANDOM_SEED, -10.0f, 10.0f );
1816 
1817  bestClocksGeneric = 0;
1818  for ( j = 0; j < NUMTESTS; j++ ) {
1819  dst.Zero();
1820  StartRecordTime( start );
1821  p_generic->MatX_TransposeMultiplyVecX( dst, mat, src );
1822  StopRecordTime( end );
1823  GetBest( start, end, bestClocksGeneric );
1824  }
1825  tst = dst;
1826 
1827  PrintClocks( va( "generic->MatX_TransposeMulVecX 6x%d*6x1", i ), 1, bestClocksGeneric );
1828 
1829  bestClocksSIMD = 0;
1830  for ( j = 0; j < NUMTESTS; j++ ) {
1831  dst.Zero();
1832  StartRecordTime( start );
1833  p_simd->MatX_TransposeMultiplyVecX( dst, mat, src );
1834  StopRecordTime( end );
1835  GetBest( start, end, bestClocksSIMD );
1836  }
1837 
1838  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1839  PrintClocks( va( " simd->MatX_TransposeMulVecX 6x%d*6x1 %s", i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1840  }
1841 }
1842 
1843 /*
1844 ============
1845 TestMatXTransposeMultiplyAddVecX
1846 ============
1847 */
1849  int i, j;
1850  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1851  const char *result;
1852  idMatX mat;
1853  idVecX src(6);
1854  idVecX dst(6);
1855  idVecX tst(6);
1856 
1857  src[0] = 1.0f;
1858  src[1] = 2.0f;
1859  src[2] = 3.0f;
1860  src[3] = 4.0f;
1861  src[4] = 5.0f;
1862  src[5] = 6.0f;
1863 
1864  idLib::common->Printf("================= Nx6 * Nx1 ===================\n" );
1865 
1866  for ( i = 1; i <= 6; i++ ) {
1867  mat.Random( i, 6, RANDOM_SEED, -10.0f, 10.0f );
1868 
1869  bestClocksGeneric = 0;
1870  for ( j = 0; j < NUMTESTS; j++ ) {
1871  dst.Zero();
1872  StartRecordTime( start );
1873  p_generic->MatX_TransposeMultiplyAddVecX( dst, mat, src );
1874  StopRecordTime( end );
1875  GetBest( start, end, bestClocksGeneric );
1876  }
1877  tst = dst;
1878 
1879  PrintClocks( va( "generic->MatX_TransposeMulAddVecX %dx6*%dx1", i, i ), 1, bestClocksGeneric );
1880 
1881  bestClocksSIMD = 0;
1882  for ( j = 0; j < NUMTESTS; j++ ) {
1883  dst.Zero();
1884  StartRecordTime( start );
1885  p_simd->MatX_TransposeMultiplyAddVecX( dst, mat, src );
1886  StopRecordTime( end );
1887  GetBest( start, end, bestClocksSIMD );
1888  }
1889 
1890  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1891  PrintClocks( va( " simd->MatX_TransposeMulAddVecX %dx6*%dx1 %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1892  }
1893 
1894  idLib::common->Printf("================= 6xN * 6x1 ===================\n" );
1895 
1896  for ( i = 1; i <= 6; i++ ) {
1897  mat.Random( 6, i, RANDOM_SEED, -10.0f, 10.0f );
1898 
1899  bestClocksGeneric = 0;
1900  for ( j = 0; j < NUMTESTS; j++ ) {
1901  dst.Zero();
1902  StartRecordTime( start );
1903  p_generic->MatX_TransposeMultiplyAddVecX( dst, mat, src );
1904  StopRecordTime( end );
1905  GetBest( start, end, bestClocksGeneric );
1906  }
1907  tst = dst;
1908 
1909  PrintClocks( va( "generic->MatX_TransposeMulAddVecX 6x%d*6x1", i ), 1, bestClocksGeneric );
1910 
1911  bestClocksSIMD = 0;
1912  for ( j = 0; j < NUMTESTS; j++ ) {
1913  dst.Zero();
1914  StartRecordTime( start );
1915  p_simd->MatX_TransposeMultiplyAddVecX( dst, mat, src );
1916  StopRecordTime( end );
1917  GetBest( start, end, bestClocksSIMD );
1918  }
1919 
1920  result = dst.Compare( tst, MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1921  PrintClocks( va( " simd->MatX_TransposeMulAddVecX 6x%d*6x1 %s", i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1922  }
1923 }
1924 
1925 /*
1926 ============
1927 TestMatXMultiplyMatX
1928 ============
1929 */
1930 #define TEST_VALUE_RANGE 10.0f
1931 #define MATX_MATX_SIMD_EPSILON 1e-4f
1932 
1933 void TestMatXMultiplyMatX( void ) {
1934  int i, j;
1935  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
1936  const char *result;
1937  idMatX m1, m2, dst, tst;
1938 
1939  idLib::common->Printf("================= NxN * Nx6 ===================\n" );
1940 
1941  // NxN * Nx6
1942  for ( i = 1; i <= 5; i++ ) {
1945  dst.SetSize( i, 6 );
1946 
1947  bestClocksGeneric = 0;
1948  for ( j = 0; j < NUMTESTS; j++ ) {
1949  StartRecordTime( start );
1950  p_generic->MatX_MultiplyMatX( dst, m1, m2 );
1951  StopRecordTime( end );
1952  GetBest( start, end, bestClocksGeneric );
1953  }
1954  tst = dst;
1955 
1956  PrintClocks( va( "generic->MatX_MultiplyMatX %dx%d*%dx6", i, i, i ), 1, bestClocksGeneric );
1957 
1958  bestClocksSIMD = 0;
1959  for ( j = 0; j < NUMTESTS; j++ ) {
1960  StartRecordTime( start );
1961  p_simd->MatX_MultiplyMatX( dst, m1, m2 );
1962  StopRecordTime( end );
1963  GetBest( start, end, bestClocksSIMD );
1964  }
1965 
1966  result = dst.Compare( tst, MATX_MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1967  PrintClocks( va( " simd->MatX_MultiplyMatX %dx%d*%dx6 %s", i, i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1968  }
1969 
1970  idLib::common->Printf("================= 6xN * Nx6 ===================\n" );
1971 
1972  // 6xN * Nx6
1973  for ( i = 1; i <= 5; i++ ) {
1976  dst.SetSize( 6, 6 );
1977 
1978  bestClocksGeneric = 0;
1979  for ( j = 0; j < NUMTESTS; j++ ) {
1980  StartRecordTime( start );
1981  p_generic->MatX_MultiplyMatX( dst, m1, m2 );
1982  StopRecordTime( end );
1983  GetBest( start, end, bestClocksGeneric );
1984  }
1985  tst = dst;
1986 
1987  PrintClocks( va( "generic->MatX_MultiplyMatX 6x%d*%dx6", i, i ), 1, bestClocksGeneric );
1988 
1989  bestClocksSIMD = 0;
1990  for ( j = 0; j < NUMTESTS; j++ ) {
1991  StartRecordTime( start );
1992  p_simd->MatX_MultiplyMatX( dst, m1, m2 );
1993  StopRecordTime( end );
1994  GetBest( start, end, bestClocksSIMD );
1995  }
1996 
1997  result = dst.Compare( tst, MATX_MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
1998  PrintClocks( va( " simd->MatX_MultiplyMatX 6x%d*%dx6 %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
1999  }
2000 
2001  idLib::common->Printf("================= Nx6 * 6xN ===================\n" );
2002 
2003  // Nx6 * 6xN
2004  for ( i = 1; i <= 5; i++ ) {
2007  dst.SetSize( i, i );
2008 
2009  bestClocksGeneric = 0;
2010  for ( j = 0; j < NUMTESTS; j++ ) {
2011  StartRecordTime( start );
2012  p_generic->MatX_MultiplyMatX( dst, m1, m2 );
2013  StopRecordTime( end );
2014  GetBest( start, end, bestClocksGeneric );
2015  }
2016  tst = dst;
2017 
2018  PrintClocks( va( "generic->MatX_MultiplyMatX %dx6*6x%d", i, i ), 1, bestClocksGeneric );
2019 
2020  bestClocksSIMD = 0;
2021  for ( j = 0; j < NUMTESTS; j++ ) {
2022  StartRecordTime( start );
2023  p_simd->MatX_MultiplyMatX( dst, m1, m2 );
2024  StopRecordTime( end );
2025  GetBest( start, end, bestClocksSIMD );
2026  }
2027 
2028  result = dst.Compare( tst, MATX_MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2029  PrintClocks( va( " simd->MatX_MultiplyMatX %dx6*6x%d %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2030  }
2031 
2032  idLib::common->Printf("================= 6x6 * 6xN ===================\n" );
2033 
2034  // 6x6 * 6xN
2035  for ( i = 1; i <= 6; i++ ) {
2038  dst.SetSize( 6, i );
2039 
2040  bestClocksGeneric = 0;
2041  for ( j = 0; j < NUMTESTS; j++ ) {
2042  StartRecordTime( start );
2043  p_generic->MatX_MultiplyMatX( dst, m1, m2 );
2044  StopRecordTime( end );
2045  GetBest( start, end, bestClocksGeneric );
2046  }
2047  tst = dst;
2048 
2049  PrintClocks( va( "generic->MatX_MultiplyMatX 6x6*6x%d", i ), 1, bestClocksGeneric );
2050 
2051  bestClocksSIMD = 0;
2052  for ( j = 0; j < NUMTESTS; j++ ) {
2053  StartRecordTime( start );
2054  p_simd->MatX_MultiplyMatX( dst, m1, m2 );
2055  StopRecordTime( end );
2056  GetBest( start, end, bestClocksSIMD );
2057  }
2058 
2059  result = dst.Compare( tst, MATX_MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2060  PrintClocks( va( " simd->MatX_MultiplyMatX 6x6*6x%d %s", i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2061  }
2062 }
2063 
2064 /*
2065 ============
2066 TestMatXTransposeMultiplyMatX
2067 ============
2068 */
2070  int i, j;
2071  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2072  const char *result;
2073  idMatX m1, m2, dst, tst;
2074 
2075  idLib::common->Printf("================= Nx6 * NxN ===================\n" );
2076 
2077  // Nx6 * NxN
2078  for ( i = 1; i <= 5; i++ ) {
2081  dst.SetSize( 6, i );
2082 
2083  bestClocksGeneric = 0;
2084  for ( j = 0; j < NUMTESTS; j++ ) {
2085  StartRecordTime( start );
2086  p_generic->MatX_TransposeMultiplyMatX( dst, m1, m2 );
2087  StopRecordTime( end );
2088  GetBest( start, end, bestClocksGeneric );
2089  }
2090  tst = dst;
2091 
2092  PrintClocks( va( "generic->MatX_TransMultiplyMatX %dx6*%dx%d", i, i, i ), 1, bestClocksGeneric );
2093 
2094  bestClocksSIMD = 0;
2095  for ( j = 0; j < NUMTESTS; j++ ) {
2096  StartRecordTime( start );
2097  p_simd->MatX_TransposeMultiplyMatX( dst, m1, m2 );
2098  StopRecordTime( end );
2099  GetBest( start, end, bestClocksSIMD );
2100  }
2101 
2102  result = dst.Compare( tst, MATX_MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2103  PrintClocks( va( " simd->MatX_TransMultiplyMatX %dx6*%dx%d %s", i, i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2104  }
2105 
2106  idLib::common->Printf("================= 6xN * 6x6 ===================\n" );
2107 
2108  // 6xN * 6x6
2109  for ( i = 1; i <= 6; i++ ) {
2112  dst.SetSize( i, 6 );
2113 
2114  bestClocksGeneric = 0;
2115  for ( j = 0; j < NUMTESTS; j++ ) {
2116  StartRecordTime( start );
2117  p_generic->MatX_TransposeMultiplyMatX( dst, m1, m2 );
2118  StopRecordTime( end );
2119  GetBest( start, end, bestClocksGeneric );
2120  }
2121  tst = dst;
2122 
2123  PrintClocks( va( "generic->MatX_TransMultiplyMatX 6x%d*6x6", i ), 1, bestClocksGeneric );
2124 
2125  bestClocksSIMD = 0;
2126  for ( j = 0; j < NUMTESTS; j++ ) {
2127  StartRecordTime( start );
2128  p_simd->MatX_TransposeMultiplyMatX( dst, m1, m2 );
2129  StopRecordTime( end );
2130  GetBest( start, end, bestClocksSIMD );
2131  }
2132 
2133  result = dst.Compare( tst, MATX_MATX_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2134  PrintClocks( va( " simd->MatX_TransMultiplyMatX 6x%d*6x6 %s", i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2135  }
2136 }
2137 
2138 #define MATX_LTS_SIMD_EPSILON 1.0f
2139 #define MATX_LTS_SOLVE_SIZE 100
2140 
2141 /*
2142 ============
2143 TestMatXLowerTriangularSolve
2144 ============
2145 */
2147  int i, j;
2148  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2149  const char *result;
2150  idMatX L;
2151  idVecX x, b, tst;
2152 
2153  idLib::common->Printf("====================================\n" );
2154 
2155  L.Random( MATX_LTS_SOLVE_SIZE, MATX_LTS_SOLVE_SIZE, 0, -1.0f, 1.0f );
2157  b.Random( MATX_LTS_SOLVE_SIZE, 0, -1.0f, 1.0f );
2158 
2159  for ( i = 1; i < MATX_LTS_SOLVE_SIZE; i++ ) {
2160 
2161  x.Zero( i );
2162 
2163  bestClocksGeneric = 0;
2164  for ( j = 0; j < NUMTESTS; j++ ) {
2165  StartRecordTime( start );
2166  p_generic->MatX_LowerTriangularSolve( L, x.ToFloatPtr(), b.ToFloatPtr(), i );
2167  StopRecordTime( end );
2168  GetBest( start, end, bestClocksGeneric );
2169  }
2170  tst = x;
2171  x.Zero();
2172 
2173  PrintClocks( va( "generic->MatX_LowerTriangularSolve %dx%d", i, i ), 1, bestClocksGeneric );
2174 
2175  bestClocksSIMD = 0;
2176  for ( j = 0; j < NUMTESTS; j++ ) {
2177  StartRecordTime( start );
2178  p_simd->MatX_LowerTriangularSolve( L, x.ToFloatPtr(), b.ToFloatPtr(), i );
2179  StopRecordTime( end );
2180  GetBest( start, end, bestClocksSIMD );
2181  }
2182 
2183  result = x.Compare( tst, MATX_LTS_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2184  PrintClocks( va( " simd->MatX_LowerTriangularSolve %dx%d %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2185  }
2186 }
2187 
2188 /*
2189 ============
2190 TestMatXLowerTriangularSolveTranspose
2191 ============
2192 */
2194  int i, j;
2195  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2196  const char *result;
2197  idMatX L;
2198  idVecX x, b, tst;
2199 
2200  idLib::common->Printf("====================================\n" );
2201 
2202  L.Random( MATX_LTS_SOLVE_SIZE, MATX_LTS_SOLVE_SIZE, 0, -1.0f, 1.0f );
2204  b.Random( MATX_LTS_SOLVE_SIZE, 0, -1.0f, 1.0f );
2205 
2206  for ( i = 1; i < MATX_LTS_SOLVE_SIZE; i++ ) {
2207 
2208  x.Zero( i );
2209 
2210  bestClocksGeneric = 0;
2211  for ( j = 0; j < NUMTESTS; j++ ) {
2212  StartRecordTime( start );
2213  p_generic->MatX_LowerTriangularSolveTranspose( L, x.ToFloatPtr(), b.ToFloatPtr(), i );
2214  StopRecordTime( end );
2215  GetBest( start, end, bestClocksGeneric );
2216  }
2217  tst = x;
2218  x.Zero();
2219 
2220  PrintClocks( va( "generic->MatX_LowerTriangularSolveT %dx%d", i, i ), 1, bestClocksGeneric );
2221 
2222  bestClocksSIMD = 0;
2223  for ( j = 0; j < NUMTESTS; j++ ) {
2224  StartRecordTime( start );
2226  StopRecordTime( end );
2227  GetBest( start, end, bestClocksSIMD );
2228  }
2229 
2230  result = x.Compare( tst, MATX_LTS_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2231  PrintClocks( va( " simd->MatX_LowerTriangularSolveT %dx%d %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2232  }
2233 }
2234 
2235 #define MATX_LDLT_SIMD_EPSILON 0.1f
2236 #define MATX_LDLT_FACTOR_SOLVE_SIZE 64
2237 
2238 /*
2239 ============
2240 TestMatXLDLTFactor
2241 ============
2242 */
2243 void TestMatXLDLTFactor( void ) {
2244  int i, j;
2245  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2246  const char *result;
2247  idMatX src, original, mat1, mat2;
2248  idVecX invDiag1, invDiag2;
2249 
2250  idLib::common->Printf("====================================\n" );
2251 
2254  src.TransposeMultiply( original, src );
2255 
2256  for ( i = 1; i < MATX_LDLT_FACTOR_SOLVE_SIZE; i++ ) {
2257 
2258  bestClocksGeneric = 0;
2259  for ( j = 0; j < NUMTESTS; j++ ) {
2260  mat1 = original;
2261  invDiag1.Zero( MATX_LDLT_FACTOR_SOLVE_SIZE );
2262  StartRecordTime( start );
2263  p_generic->MatX_LDLTFactor( mat1, invDiag1, i );
2264  StopRecordTime( end );
2265  GetBest( start, end, bestClocksGeneric );
2266  }
2267 
2268  PrintClocks( va( "generic->MatX_LDLTFactor %dx%d", i, i ), 1, bestClocksGeneric );
2269 
2270  bestClocksSIMD = 0;
2271  for ( j = 0; j < NUMTESTS; j++ ) {
2272  mat2 = original;
2273  invDiag2.Zero( MATX_LDLT_FACTOR_SOLVE_SIZE );
2274  StartRecordTime( start );
2275  p_simd->MatX_LDLTFactor( mat2, invDiag2, i );
2276  StopRecordTime( end );
2277  GetBest( start, end, bestClocksSIMD );
2278  }
2279 
2280  result = mat1.Compare( mat2, MATX_LDLT_SIMD_EPSILON ) && invDiag1.Compare( invDiag2, MATX_LDLT_SIMD_EPSILON ) ? "ok" : S_COLOR_RED"X";
2281  PrintClocks( va( " simd->MatX_LDLTFactor %dx%d %s", i, i, result ), 1, bestClocksSIMD, bestClocksGeneric );
2282  }
2283 }
2284 
2285 /*
2286 ============
2287 TestBlendJoints
2288 ============
2289 */
2290 void TestBlendJoints( void ) {
2291  int i, j;
2292  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2293  ALIGN16( idJointQuat baseJoints[COUNT] );
2294  ALIGN16( idJointQuat joints1[COUNT] );
2295  ALIGN16( idJointQuat joints2[COUNT] );
2296  ALIGN16( idJointQuat blendJoints[COUNT] );
2297  ALIGN16( int index[COUNT] );
2298  float lerp = 0.3f;
2299  const char *result;
2300 
2301  idRandom srnd( RANDOM_SEED );
2302 
2303  for ( i = 0; i < COUNT; i++ ) {
2304  idAngles angles;
2305  angles[0] = srnd.CRandomFloat() * 180.0f;
2306  angles[1] = srnd.CRandomFloat() * 180.0f;
2307  angles[2] = srnd.CRandomFloat() * 180.0f;
2308  baseJoints[i].q = angles.ToQuat();
2309  baseJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
2310  baseJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
2311  baseJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
2312  angles[0] = srnd.CRandomFloat() * 180.0f;
2313  angles[1] = srnd.CRandomFloat() * 180.0f;
2314  angles[2] = srnd.CRandomFloat() * 180.0f;
2315  blendJoints[i].q = angles.ToQuat();
2316  blendJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
2317  blendJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
2318  blendJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
2319  index[i] = i;
2320  }
2321 
2322  bestClocksGeneric = 0;
2323  for ( i = 0; i < NUMTESTS; i++ ) {
2324  for ( j = 0; j < COUNT; j++ ) {
2325  joints1[j] = baseJoints[j];
2326  }
2327  StartRecordTime( start );
2328  p_generic->BlendJoints( joints1, blendJoints, lerp, index, COUNT );
2329  StopRecordTime( end );
2330  GetBest( start, end, bestClocksGeneric );
2331  }
2332  PrintClocks( "generic->BlendJoints()", COUNT, bestClocksGeneric );
2333 
2334  bestClocksSIMD = 0;
2335  for ( i = 0; i < NUMTESTS; i++ ) {
2336  for ( j = 0; j < COUNT; j++ ) {
2337  joints2[j] = baseJoints[j];
2338  }
2339  StartRecordTime( start );
2340  p_simd->BlendJoints( joints2, blendJoints, lerp, index, COUNT );
2341  StopRecordTime( end );
2342  GetBest( start, end, bestClocksSIMD );
2343  }
2344 
2345  for ( i = 0; i < COUNT; i++ ) {
2346  if ( !joints1[i].t.Compare( joints2[i].t, 1e-3f ) ) {
2347  break;
2348  }
2349  if ( !joints1[i].q.Compare( joints2[i].q, 1e-2f ) ) {
2350  break;
2351  }
2352  }
2353  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2354  PrintClocks( va( " simd->BlendJoints() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2355 }
2356 
2357 /*
2358 ============
2359 TestConvertJointQuatsToJointMats
2360 ============
2361 */
2363  int i;
2364  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2365  ALIGN16( idJointQuat baseJoints[COUNT] );
2366  ALIGN16( idJointMat joints1[COUNT] );
2367  ALIGN16( idJointMat joints2[COUNT] );
2368  const char *result;
2369 
2370  idRandom srnd( RANDOM_SEED );
2371 
2372  for ( i = 0; i < COUNT; i++ ) {
2373  idAngles angles;
2374  angles[0] = srnd.CRandomFloat() * 180.0f;
2375  angles[1] = srnd.CRandomFloat() * 180.0f;
2376  angles[2] = srnd.CRandomFloat() * 180.0f;
2377  baseJoints[i].q = angles.ToQuat();
2378  baseJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
2379  baseJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
2380  baseJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
2381  }
2382 
2383  bestClocksGeneric = 0;
2384  for ( i = 0; i < NUMTESTS; i++ ) {
2385  StartRecordTime( start );
2386  p_generic->ConvertJointQuatsToJointMats( joints1, baseJoints, COUNT );
2387  StopRecordTime( end );
2388  GetBest( start, end, bestClocksGeneric );
2389  }
2390  PrintClocks( "generic->ConvertJointQuatsToJointMats()", COUNT, bestClocksGeneric );
2391 
2392  bestClocksSIMD = 0;
2393  for ( i = 0; i < NUMTESTS; i++ ) {
2394  StartRecordTime( start );
2395  p_simd->ConvertJointQuatsToJointMats( joints2, baseJoints, COUNT );
2396  StopRecordTime( end );
2397  GetBest( start, end, bestClocksSIMD );
2398  }
2399 
2400  for ( i = 0; i < COUNT; i++ ) {
2401  if ( !joints1[i].Compare( joints2[i], 1e-4f ) ) {
2402  break;
2403  }
2404  }
2405  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2406  PrintClocks( va( " simd->ConvertJointQuatsToJointMats() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2407 }
2408 
2409 /*
2410 ============
2411 TestConvertJointMatsToJointQuats
2412 ============
2413 */
2415  int i;
2416  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2417  ALIGN16( idJointMat baseJoints[COUNT] );
2418  ALIGN16( idJointQuat joints1[COUNT] );
2419  ALIGN16( idJointQuat joints2[COUNT] );
2420  const char *result;
2421 
2422  idRandom srnd( RANDOM_SEED );
2423 
2424  for ( i = 0; i < COUNT; i++ ) {
2425  idAngles angles;
2426  angles[0] = srnd.CRandomFloat() * 180.0f;
2427  angles[1] = srnd.CRandomFloat() * 180.0f;
2428  angles[2] = srnd.CRandomFloat() * 180.0f;
2429  baseJoints[i].SetRotation( angles.ToMat3() );
2430  idVec3 v;
2431  v[0] = srnd.CRandomFloat() * 10.0f;
2432  v[1] = srnd.CRandomFloat() * 10.0f;
2433  v[2] = srnd.CRandomFloat() * 10.0f;
2434  baseJoints[i].SetTranslation( v );
2435  }
2436 
2437  bestClocksGeneric = 0;
2438  for ( i = 0; i < NUMTESTS; i++ ) {
2439  StartRecordTime( start );
2440  p_generic->ConvertJointMatsToJointQuats( joints1, baseJoints, COUNT );
2441  StopRecordTime( end );
2442  GetBest( start, end, bestClocksGeneric );
2443  }
2444  PrintClocks( "generic->ConvertJointMatsToJointQuats()", COUNT, bestClocksGeneric );
2445 
2446  bestClocksSIMD = 0;
2447  for ( i = 0; i < NUMTESTS; i++ ) {
2448  StartRecordTime( start );
2449  p_simd->ConvertJointMatsToJointQuats( joints2, baseJoints, COUNT );
2450  StopRecordTime( end );
2451  GetBest( start, end, bestClocksSIMD );
2452  }
2453 
2454  for ( i = 0; i < COUNT; i++ ) {
2455  if ( !joints1[i].q.Compare( joints2[i].q, 1e-4f ) ) {
2456  idLib::common->Printf("ConvertJointMatsToJointQuats: broken q %i\n", i );
2457  break;
2458  }
2459  if ( !joints1[i].t.Compare( joints2[i].t, 1e-4f ) ) {
2460  idLib::common->Printf("ConvertJointMatsToJointQuats: broken t %i\n", i );
2461  break;
2462  }
2463  }
2464  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2465  PrintClocks( va( " simd->ConvertJointMatsToJointQuats() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2466 }
2467 
2468 /*
2469 ============
2470 TestTransformJoints
2471 ============
2472 */
2473 void TestTransformJoints( void ) {
2474  int i, j;
2475  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2476  ALIGN16( idJointMat joints[COUNT+1] );
2477  ALIGN16( idJointMat joints1[COUNT+1] );
2478  ALIGN16( idJointMat joints2[COUNT+1] );
2479  ALIGN16( int parents[COUNT+1] );
2480  const char *result;
2481 
2482  idRandom srnd( RANDOM_SEED );
2483 
2484  for ( i = 0; i <= COUNT; i++ ) {
2485  idAngles angles;
2486  angles[0] = srnd.CRandomFloat() * 180.0f;
2487  angles[1] = srnd.CRandomFloat() * 180.0f;
2488  angles[2] = srnd.CRandomFloat() * 180.0f;
2489  joints[i].SetRotation( angles.ToMat3() );
2490  idVec3 v;
2491  v[0] = srnd.CRandomFloat() * 2.0f;
2492  v[1] = srnd.CRandomFloat() * 2.0f;
2493  v[2] = srnd.CRandomFloat() * 2.0f;
2494  joints[i].SetTranslation( v );
2495  parents[i] = i - 1;
2496  }
2497 
2498  bestClocksGeneric = 0;
2499  for ( i = 0; i < NUMTESTS; i++ ) {
2500  for ( j = 0; j <= COUNT; j++ ) {
2501  joints1[j] = joints[j];
2502  }
2503  StartRecordTime( start );
2504  p_generic->TransformJoints( joints1, parents, 1, COUNT );
2505  StopRecordTime( end );
2506  GetBest( start, end, bestClocksGeneric );
2507  }
2508  PrintClocks( "generic->TransformJoints()", COUNT, bestClocksGeneric );
2509 
2510  bestClocksSIMD = 0;
2511  for ( i = 0; i < NUMTESTS; i++ ) {
2512  for ( j = 0; j <= COUNT; j++ ) {
2513  joints2[j] = joints[j];
2514  }
2515  StartRecordTime( start );
2516  p_simd->TransformJoints( joints2, parents, 1, COUNT );
2517  StopRecordTime( end );
2518  GetBest( start, end, bestClocksSIMD );
2519  }
2520 
2521  for ( i = 0; i < COUNT; i++ ) {
2522  if ( !joints1[i+1].Compare( joints2[i+1], 1e-4f ) ) {
2523  break;
2524  }
2525  }
2526  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2527  PrintClocks( va( " simd->TransformJoints() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2528 }
2529 
2530 /*
2531 ============
2532 TestUntransformJoints
2533 ============
2534 */
2536  int i, j;
2537  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2538  ALIGN16( idJointMat joints[COUNT+1] );
2539  ALIGN16( idJointMat joints1[COUNT+1] );
2540  ALIGN16( idJointMat joints2[COUNT+1] );
2541  ALIGN16( int parents[COUNT+1] );
2542  const char *result;
2543 
2544  idRandom srnd( RANDOM_SEED );
2545 
2546  for ( i = 0; i <= COUNT; i++ ) {
2547  idAngles angles;
2548  angles[0] = srnd.CRandomFloat() * 180.0f;
2549  angles[1] = srnd.CRandomFloat() * 180.0f;
2550  angles[2] = srnd.CRandomFloat() * 180.0f;
2551  joints[i].SetRotation( angles.ToMat3() );
2552  idVec3 v;
2553  v[0] = srnd.CRandomFloat() * 2.0f;
2554  v[1] = srnd.CRandomFloat() * 2.0f;
2555  v[2] = srnd.CRandomFloat() * 2.0f;
2556  joints[i].SetTranslation( v );
2557  parents[i] = i - 1;
2558  }
2559 
2560  bestClocksGeneric = 0;
2561  for ( i = 0; i < NUMTESTS; i++ ) {
2562  for ( j = 0; j <= COUNT; j++ ) {
2563  joints1[j] = joints[j];
2564  }
2565  StartRecordTime( start );
2566  p_generic->UntransformJoints( joints1, parents, 1, COUNT );
2567  StopRecordTime( end );
2568  GetBest( start, end, bestClocksGeneric );
2569  }
2570  PrintClocks( "generic->UntransformJoints()", COUNT, bestClocksGeneric );
2571 
2572  bestClocksSIMD = 0;
2573  for ( i = 0; i < NUMTESTS; i++ ) {
2574  for ( j = 0; j <= COUNT; j++ ) {
2575  joints2[j] = joints[j];
2576  }
2577  StartRecordTime( start );
2578  p_simd->UntransformJoints( joints2, parents, 1, COUNT );
2579  StopRecordTime( end );
2580  GetBest( start, end, bestClocksSIMD );
2581  }
2582 
2583  for ( i = 0; i < COUNT; i++ ) {
2584  if ( !joints1[i+1].Compare( joints2[i+1], 1e-4f ) ) {
2585  break;
2586  }
2587  }
2588  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2589  PrintClocks( va( " simd->UntransformJoints() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2590 }
2591 
2592 /*
2593 ============
2594 TestTransformVerts
2595 ============
2596 */
2597 #define NUMJOINTS 64
2598 #define NUMVERTS COUNT/2
2599 void TestTransformVerts( void ) {
2600  int i;
2601  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2602  ALIGN16( idDrawVert drawVerts1[NUMVERTS] );
2603  ALIGN16( idDrawVert drawVerts2[NUMVERTS] );
2604  ALIGN16( idJointMat joints[NUMJOINTS] );
2605  ALIGN16( idVec4 weights[COUNT] );
2606  ALIGN16( int weightIndex[COUNT*2] );
2607  const char *result;
2608 
2609  idRandom srnd( RANDOM_SEED );
2610 
2611  for ( i = 0; i < NUMJOINTS; i++ ) {
2612  idAngles angles;
2613  angles[0] = srnd.CRandomFloat() * 180.0f;
2614  angles[1] = srnd.CRandomFloat() * 180.0f;
2615  angles[2] = srnd.CRandomFloat() * 180.0f;
2616  joints[i].SetRotation( angles.ToMat3() );
2617  idVec3 v;
2618  v[0] = srnd.CRandomFloat() * 2.0f;
2619  v[1] = srnd.CRandomFloat() * 2.0f;
2620  v[2] = srnd.CRandomFloat() * 2.0f;
2621  joints[i].SetTranslation( v );
2622  }
2623 
2624  for ( i = 0; i < COUNT; i++ ) {
2625  weights[i][0] = srnd.CRandomFloat() * 2.0f;
2626  weights[i][1] = srnd.CRandomFloat() * 2.0f;
2627  weights[i][2] = srnd.CRandomFloat() * 2.0f;
2628  weights[i][3] = srnd.CRandomFloat();
2629  weightIndex[i*2+0] = ( i * NUMJOINTS / COUNT ) * sizeof( idJointMat );
2630  weightIndex[i*2+1] = i & 1;
2631  }
2632 
2633  bestClocksGeneric = 0;
2634  for ( i = 0; i < NUMTESTS; i++ ) {
2635  StartRecordTime( start );
2636  p_generic->TransformVerts( drawVerts1, NUMVERTS, joints, weights, weightIndex, COUNT );
2637  StopRecordTime( end );
2638  GetBest( start, end, bestClocksGeneric );
2639  }
2640  PrintClocks( "generic->TransformVerts()", COUNT, bestClocksGeneric );
2641 
2642  bestClocksSIMD = 0;
2643  for ( i = 0; i < NUMTESTS; i++ ) {
2644  StartRecordTime( start );
2645  p_simd->TransformVerts( drawVerts2, NUMVERTS, joints, weights, weightIndex, COUNT );
2646  StopRecordTime( end );
2647  GetBest( start, end, bestClocksSIMD );
2648  }
2649 
2650  for ( i = 0; i < NUMVERTS; i++ ) {
2651  if ( !drawVerts1[i].xyz.Compare( drawVerts2[i].xyz, 0.5f ) ) {
2652  break;
2653  }
2654  }
2655  result = ( i >= NUMVERTS ) ? "ok" : S_COLOR_RED"X";
2656  PrintClocks( va( " simd->TransformVerts() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2657 }
2658 
2659 /*
2660 ============
2661 TestTracePointCull
2662 ============
2663 */
2664 void TestTracePointCull( void ) {
2665  int i, j;
2666  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2667  ALIGN16( idPlane planes[4] );
2668  ALIGN16( idDrawVert drawVerts[COUNT] );
2669  ALIGN16( byte cullBits1[COUNT] );
2670  ALIGN16( byte cullBits2[COUNT] );
2671  byte totalOr1 = 0, totalOr2 = 0;
2672  const char *result;
2673 
2674  idRandom srnd( RANDOM_SEED );
2675 
2676  planes[0].SetNormal( idVec3( 1, 0, 0 ) );
2677  planes[1].SetNormal( idVec3( -1, 0, 0 ) );
2678  planes[2].SetNormal( idVec3( 0, 1, 0 ) );
2679  planes[3].SetNormal( idVec3( 0, -1, 0 ) );
2680  planes[0][3] = -5.3f;
2681  planes[1][3] = 5.3f;
2682  planes[2][3] = -3.4f;
2683  planes[3][3] = 3.4f;
2684 
2685  for ( i = 0; i < COUNT; i++ ) {
2686  for ( j = 0; j < 3; j++ ) {
2687  drawVerts[i].xyz[j] = srnd.CRandomFloat() * 10.0f;
2688  }
2689  }
2690 
2691  bestClocksGeneric = 0;
2692  for ( i = 0; i < NUMTESTS; i++ ) {
2693  StartRecordTime( start );
2694  p_generic->TracePointCull( cullBits1, totalOr1, 0.0f, planes, drawVerts, COUNT );
2695  StopRecordTime( end );
2696  GetBest( start, end, bestClocksGeneric );
2697  }
2698  PrintClocks( "generic->TracePointCull()", COUNT, bestClocksGeneric );
2699 
2700  bestClocksSIMD = 0;
2701  for ( i = 0; i < NUMTESTS; i++ ) {
2702  StartRecordTime( start );
2703  p_simd->TracePointCull( cullBits2, totalOr2, 0.0f, planes, drawVerts, COUNT );
2704  StopRecordTime( end );
2705  GetBest( start, end, bestClocksSIMD );
2706  }
2707 
2708  for ( i = 0; i < COUNT; i++ ) {
2709  if ( cullBits1[i] != cullBits2[i] ) {
2710  break;
2711  }
2712  }
2713  result = ( i >= COUNT && totalOr1 == totalOr2 ) ? "ok" : S_COLOR_RED"X";
2714  PrintClocks( va( " simd->TracePointCull() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2715 }
2716 
2717 /*
2718 ============
2719 TestDecalPointCull
2720 ============
2721 */
2722 void TestDecalPointCull( void ) {
2723  int i, j;
2724  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2725  ALIGN16( idPlane planes[6] );
2726  ALIGN16( idDrawVert drawVerts[COUNT] );
2727  ALIGN16( byte cullBits1[COUNT] );
2728  ALIGN16( byte cullBits2[COUNT] );
2729  const char *result;
2730 
2731  idRandom srnd( RANDOM_SEED );
2732 
2733  planes[0].SetNormal( idVec3( 1, 0, 0 ) );
2734  planes[1].SetNormal( idVec3( -1, 0, 0 ) );
2735  planes[2].SetNormal( idVec3( 0, 1, 0 ) );
2736  planes[3].SetNormal( idVec3( 0, -1, 0 ) );
2737  planes[4].SetNormal( idVec3( 0, 0, 1 ) );
2738  planes[5].SetNormal( idVec3( 0, 0, -1 ) );
2739  planes[0][3] = -5.3f;
2740  planes[1][3] = 5.3f;
2741  planes[2][3] = -4.4f;
2742  planes[3][3] = 4.4f;
2743  planes[4][3] = -3.5f;
2744  planes[5][3] = 3.5f;
2745 
2746  for ( i = 0; i < COUNT; i++ ) {
2747  for ( j = 0; j < 3; j++ ) {
2748  drawVerts[i].xyz[j] = srnd.CRandomFloat() * 10.0f;
2749  }
2750  }
2751 
2752  bestClocksGeneric = 0;
2753  for ( i = 0; i < NUMTESTS; i++ ) {
2754  StartRecordTime( start );
2755  p_generic->DecalPointCull( cullBits1, planes, drawVerts, COUNT );
2756  StopRecordTime( end );
2757  GetBest( start, end, bestClocksGeneric );
2758  }
2759  PrintClocks( "generic->DecalPointCull()", COUNT, bestClocksGeneric );
2760 
2761  bestClocksSIMD = 0;
2762  for ( i = 0; i < NUMTESTS; i++ ) {
2763  StartRecordTime( start );
2764  p_simd->DecalPointCull( cullBits2, planes, drawVerts, COUNT );
2765  StopRecordTime( end );
2766  GetBest( start, end, bestClocksSIMD );
2767  }
2768 
2769  for ( i = 0; i < COUNT; i++ ) {
2770  if ( cullBits1[i] != cullBits2[i] ) {
2771  break;
2772  }
2773  }
2774  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2775  PrintClocks( va( " simd->DecalPointCull() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2776 }
2777 
2778 /*
2779 ============
2780 TestOverlayPointCull
2781 ============
2782 */
2783 void TestOverlayPointCull( void ) {
2784  int i, j;
2785  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2786  ALIGN16( idPlane planes[2] );
2787  ALIGN16( idDrawVert drawVerts[COUNT] );
2788  ALIGN16( byte cullBits1[COUNT] );
2789  ALIGN16( byte cullBits2[COUNT] );
2790  ALIGN16( idVec2 texCoords1[COUNT] );
2791  ALIGN16( idVec2 texCoords2[COUNT] );
2792  const char *result;
2793 
2794  idRandom srnd( RANDOM_SEED );
2795 
2796  planes[0].SetNormal( idVec3( 0.3f, 0.2f, 0.9f ) );
2797  planes[1].SetNormal( idVec3( 0.9f, 0.2f, 0.3f ) );
2798  planes[0][3] = -5.3f;
2799  planes[1][3] = -4.3f;
2800 
2801  for ( i = 0; i < COUNT; i++ ) {
2802  for ( j = 0; j < 3; j++ ) {
2803  drawVerts[i].xyz[j] = srnd.CRandomFloat() * 10.0f;
2804  }
2805  }
2806 
2807  bestClocksGeneric = 0;
2808  for ( i = 0; i < NUMTESTS; i++ ) {
2809  StartRecordTime( start );
2810  p_generic->OverlayPointCull( cullBits1, texCoords1, planes, drawVerts, COUNT );
2811  StopRecordTime( end );
2812  GetBest( start, end, bestClocksGeneric );
2813  }
2814  PrintClocks( "generic->OverlayPointCull()", COUNT, bestClocksGeneric );
2815 
2816  bestClocksSIMD = 0;
2817  for ( i = 0; i < NUMTESTS; i++ ) {
2818  StartRecordTime( start );
2819  p_simd->OverlayPointCull( cullBits2, texCoords2, planes, drawVerts, COUNT );
2820  StopRecordTime( end );
2821  GetBest( start, end, bestClocksSIMD );
2822  }
2823 
2824  for ( i = 0; i < COUNT; i++ ) {
2825  if ( cullBits1[i] != cullBits2[i] ) {
2826  break;
2827  }
2828  if ( !texCoords1[i].Compare( texCoords2[i], 1e-4f ) ) {
2829  break;
2830  }
2831  }
2832  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2833  PrintClocks( va( " simd->OverlayPointCull() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2834 }
2835 
2836 /*
2837 ============
2838 TestDeriveTriPlanes
2839 ============
2840 */
2841 void TestDeriveTriPlanes( void ) {
2842  int i, j;
2843  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2844  ALIGN16( idDrawVert drawVerts1[COUNT] );
2845  ALIGN16( idDrawVert drawVerts2[COUNT] );
2846  ALIGN16( idPlane planes1[COUNT] );
2847  ALIGN16( idPlane planes2[COUNT] );
2848  ALIGN16( int indexes[COUNT*3] );
2849  const char *result;
2850 
2851  idRandom srnd( RANDOM_SEED );
2852 
2853  for ( i = 0; i < COUNT; i++ ) {
2854  for ( j = 0; j < 3; j++ ) {
2855  drawVerts1[i].xyz[j] = srnd.CRandomFloat() * 10.0f;
2856  }
2857  for ( j = 0; j < 2; j++ ) {
2858  drawVerts1[i].st[j] = srnd.CRandomFloat();
2859  }
2860  drawVerts2[i] = drawVerts1[i];
2861  }
2862 
2863  for ( i = 0; i < COUNT; i++ ) {
2864  indexes[i*3+0] = ( i + 0 ) % COUNT;
2865  indexes[i*3+1] = ( i + 1 ) % COUNT;
2866  indexes[i*3+2] = ( i + 2 ) % COUNT;
2867  }
2868 
2869  bestClocksGeneric = 0;
2870  for ( i = 0; i < NUMTESTS; i++ ) {
2871  StartRecordTime( start );
2872  p_generic->DeriveTriPlanes( planes1, drawVerts1, COUNT, indexes, COUNT*3 );
2873  StopRecordTime( end );
2874  GetBest( start, end, bestClocksGeneric );
2875  }
2876  PrintClocks( "generic->DeriveTriPlanes()", COUNT, bestClocksGeneric );
2877 
2878  bestClocksSIMD = 0;
2879  for ( i = 0; i < NUMTESTS; i++ ) {
2880  StartRecordTime( start );
2881  p_simd->DeriveTriPlanes( planes2, drawVerts2, COUNT, indexes, COUNT*3 );
2882  StopRecordTime( end );
2883  GetBest( start, end, bestClocksSIMD );
2884  }
2885 
2886  for ( i = 0; i < COUNT; i++ ) {
2887  if ( !planes1[i].Compare( planes2[i], 1e-1f, 1e-1f ) ) {
2888  break;
2889  }
2890  }
2891  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2892  PrintClocks( va( " simd->DeriveTriPlanes() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2893 }
2894 
2895 /*
2896 ============
2897 TestDeriveTangents
2898 ============
2899 */
2900 void TestDeriveTangents( void ) {
2901  int i, j;
2902  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2903  ALIGN16( idDrawVert drawVerts1[COUNT] );
2904  ALIGN16( idDrawVert drawVerts2[COUNT] );
2905  ALIGN16( idPlane planes1[COUNT] );
2906  ALIGN16( idPlane planes2[COUNT] );
2907  ALIGN16( int indexes[COUNT*3] );
2908  const char *result;
2909 
2910  idRandom srnd( RANDOM_SEED );
2911 
2912  for ( i = 0; i < COUNT; i++ ) {
2913  for ( j = 0; j < 3; j++ ) {
2914  drawVerts1[i].xyz[j] = srnd.CRandomFloat() * 10.0f;
2915  }
2916  for ( j = 0; j < 2; j++ ) {
2917  drawVerts1[i].st[j] = srnd.CRandomFloat();
2918  }
2919  drawVerts2[i] = drawVerts1[i];
2920  }
2921 
2922  for ( i = 0; i < COUNT; i++ ) {
2923  indexes[i*3+0] = ( i + 0 ) % COUNT;
2924  indexes[i*3+1] = ( i + 1 ) % COUNT;
2925  indexes[i*3+2] = ( i + 2 ) % COUNT;
2926  }
2927 
2928  bestClocksGeneric = 0;
2929  for ( i = 0; i < NUMTESTS; i++ ) {
2930  StartRecordTime( start );
2931  p_generic->DeriveTangents( planes1, drawVerts1, COUNT, indexes, COUNT*3 );
2932  StopRecordTime( end );
2933  GetBest( start, end, bestClocksGeneric );
2934  }
2935  PrintClocks( "generic->DeriveTangents()", COUNT, bestClocksGeneric );
2936 
2937  bestClocksSIMD = 0;
2938  for ( i = 0; i < NUMTESTS; i++ ) {
2939  StartRecordTime( start );
2940  p_simd->DeriveTangents( planes2, drawVerts2, COUNT, indexes, COUNT*3 );
2941  StopRecordTime( end );
2942  GetBest( start, end, bestClocksSIMD );
2943  }
2944 
2945  for ( i = 0; i < COUNT; i++ ) {
2946  idVec3 v1, v2;
2947 
2948  v1 = drawVerts1[i].normal;
2949  v1.Normalize();
2950  v2 = drawVerts2[i].normal;
2951  v2.Normalize();
2952  if ( !v1.Compare( v2, 1e-1f ) ) {
2953  idLib::common->Printf("DeriveTangents: broken at normal %i\n -- expecting %s got %s", i, v1.ToString(), v2.ToString());
2954  break;
2955  }
2956  v1 = drawVerts1[i].tangents[0];
2957  v1.Normalize();
2958  v2 = drawVerts2[i].tangents[0];
2959  v2.Normalize();
2960  if ( !v1.Compare( v2, 1e-1f ) ) {
2961  idLib::common->Printf("DeriveTangents: broken at tangent0 %i -- expecting %s got %s\n", i, v1.ToString(), v2.ToString() );
2962  break;
2963  }
2964  v1 = drawVerts1[i].tangents[1];
2965  v1.Normalize();
2966  v2 = drawVerts2[i].tangents[1];
2967  v2.Normalize();
2968  if ( !v1.Compare( v2, 1e-1f ) ) {
2969  idLib::common->Printf("DeriveTangents: broken at tangent1 %i -- expecting %s got %s\n", i, v1.ToString(), v2.ToString() );
2970  break;
2971  }
2972  if ( !planes1[i].Compare( planes2[i], 1e-1f, 1e-1f ) ) {
2973  break;
2974  }
2975  }
2976  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
2977  PrintClocks( va( " simd->DeriveTangents() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
2978 }
2979 
2980 /*
2981 ============
2982 TestDeriveUnsmoothedTangents
2983 ============
2984 */
2986  int i, j;
2987  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
2988  ALIGN16( idDrawVert drawVerts1[COUNT] );
2989  ALIGN16( idDrawVert drawVerts2[COUNT] );
2990  ALIGN16( dominantTri_s dominantTris[COUNT] );
2991  const char *result;
2992 
2993  idRandom srnd( RANDOM_SEED );
2994 
2995  for ( i = 0; i < COUNT; i++ ) {
2996  for ( j = 0; j < 3; j++ ) {
2997  drawVerts1[i].xyz[j] = srnd.CRandomFloat() * 10.0f;
2998  }
2999  for ( j = 0; j < 2; j++ ) {
3000  drawVerts1[i].st[j] = srnd.CRandomFloat();
3001  }
3002  drawVerts2[i] = drawVerts1[i];
3003 
3004  dominantTris[i].v2 = ( i + 1 + srnd.RandomInt( 8 ) ) % COUNT;
3005  dominantTris[i].v3 = ( i + 9 + srnd.RandomInt( 8 ) ) % COUNT;
3006  dominantTris[i].normalizationScale[0] = srnd.CRandomFloat();
3007  dominantTris[i].normalizationScale[1] = srnd.CRandomFloat();
3008  dominantTris[i].normalizationScale[2] = srnd.CRandomFloat();
3009  }
3010 
3011  bestClocksGeneric = 0;
3012  for ( i = 0; i < NUMTESTS; i++ ) {
3013  StartRecordTime( start );
3014  p_generic->DeriveUnsmoothedTangents( drawVerts1, dominantTris, COUNT );
3015  StopRecordTime( end );
3016  GetBest( start, end, bestClocksGeneric );
3017  }
3018  PrintClocks( "generic->DeriveUnsmoothedTangents()", COUNT, bestClocksGeneric );
3019 
3020  bestClocksSIMD = 0;
3021  for ( i = 0; i < NUMTESTS; i++ ) {
3022  StartRecordTime( start );
3023  p_simd->DeriveUnsmoothedTangents( drawVerts2, dominantTris, COUNT );
3024  StopRecordTime( end );
3025  GetBest( start, end, bestClocksSIMD );
3026  }
3027 
3028  for ( i = 0; i < COUNT; i++ ) {
3029  idVec3 v1, v2;
3030 
3031  v1 = drawVerts1[i].normal;
3032  v1.Normalize();
3033  v2 = drawVerts2[i].normal;
3034  v2.Normalize();
3035  if ( !v1.Compare( v2, 1e-1f ) ) {
3036  break;
3037  }
3038  v1 = drawVerts1[i].tangents[0];
3039  v1.Normalize();
3040  v2 = drawVerts2[i].tangents[0];
3041  v2.Normalize();
3042  if ( !v1.Compare( v2, 1e-1f ) ) {
3043  break;
3044  }
3045  v1 = drawVerts1[i].tangents[1];
3046  v1.Normalize();
3047  v2 = drawVerts2[i].tangents[1];
3048  v2.Normalize();
3049  if ( !v1.Compare( v2, 1e-1f ) ) {
3050  break;
3051  }
3052  }
3053  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
3054  PrintClocks( va( " simd->DeriveUnsmoothedTangents() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
3055 }
3056 
3057 /*
3058 ============
3059 TestNormalizeTangents
3060 ============
3061 */
3063  int i, j;
3064  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
3065  ALIGN16( idDrawVert drawVerts1[COUNT] );
3066  ALIGN16( idDrawVert drawVerts2[COUNT] );
3067  const char *result;
3068 
3069  idRandom srnd( RANDOM_SEED );
3070 
3071  for ( i = 0; i < COUNT; i++ ) {
3072  for ( j = 0; j < 3; j++ ) {
3073  drawVerts1[i].normal[j] = srnd.CRandomFloat() * 10.0f;
3074  drawVerts1[i].tangents[0][j] = srnd.CRandomFloat() * 10.0f;
3075  drawVerts1[i].tangents[1][j] = srnd.CRandomFloat() * 10.0f;
3076  }
3077  drawVerts2[i] = drawVerts1[i];
3078  }
3079 
3080  bestClocksGeneric = 0;
3081  for ( i = 0; i < NUMTESTS; i++ ) {
3082  StartRecordTime( start );
3083  p_generic->NormalizeTangents( drawVerts1, COUNT );
3084  StopRecordTime( end );
3085  GetBest( start, end, bestClocksGeneric );
3086  }
3087  PrintClocks( "generic->NormalizeTangents()", COUNT, bestClocksGeneric );
3088 
3089  bestClocksSIMD = 0;
3090  for ( i = 0; i < NUMTESTS; i++ ) {
3091  StartRecordTime( start );
3092  p_simd->NormalizeTangents( drawVerts2, COUNT );
3093  StopRecordTime( end );
3094  GetBest( start, end, bestClocksSIMD );
3095  }
3096 
3097  for ( i = 0; i < COUNT; i++ ) {
3098  if ( !drawVerts1[i].normal.Compare( drawVerts2[i].normal, 1e-2f ) ) {
3099  break;
3100  }
3101  if ( !drawVerts1[i].tangents[0].Compare( drawVerts2[i].tangents[0], 1e-2f ) ) {
3102  break;
3103  }
3104  if ( !drawVerts1[i].tangents[1].Compare( drawVerts2[i].tangents[1], 1e-2f ) ) {
3105  break;
3106  }
3107 
3108  // since we're doing a lot of unaligned work, added this check to
3109  // make sure xyz wasn't getting overwritten
3110  if ( !drawVerts1[i].xyz.Compare( drawVerts2[i].xyz, 1e-2f ) ) {
3111  break;
3112  }
3113  }
3114  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
3115  PrintClocks( va( " simd->NormalizeTangents() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
3116 }
3117 
3118 /*
3119 ============
3120 TestGetTextureSpaceLightVectors
3121 ============
3122 */
3124  int i, j;
3125  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
3126  ALIGN16( idDrawVert drawVerts[COUNT] );
3127  ALIGN16( idVec4 texCoords1[COUNT] );
3128  ALIGN16( idVec4 texCoords2[COUNT] );
3129  ALIGN16( int indexes[COUNT*3] );
3130  ALIGN16( idVec3 lightVectors1[COUNT] );
3131  ALIGN16( idVec3 lightVectors2[COUNT] );
3132  idVec3 lightOrigin;
3133  const char *result;
3134 
3135  idRandom srnd( RANDOM_SEED );
3136 
3137  for ( i = 0; i < COUNT; i++ ) {
3138  for ( j = 0; j < 3; j++ ) {
3139  drawVerts[i].xyz[j] = srnd.CRandomFloat() * 100.0f;
3140  drawVerts[i].normal[j] = srnd.CRandomFloat();
3141  drawVerts[i].tangents[0][j] = srnd.CRandomFloat();
3142  drawVerts[i].tangents[1][j] = srnd.CRandomFloat();
3143  }
3144  }
3145 
3146  for ( i = 0; i < COUNT; i++ ) {
3147  indexes[i*3+0] = ( i + 0 ) % COUNT;
3148  indexes[i*3+1] = ( i + 1 ) % COUNT;
3149  indexes[i*3+2] = ( i + 2 ) % COUNT;
3150  }
3151 
3152  lightOrigin[0] = srnd.CRandomFloat() * 100.0f;
3153  lightOrigin[1] = srnd.CRandomFloat() * 100.0f;
3154  lightOrigin[2] = srnd.CRandomFloat() * 100.0f;
3155 
3156  bestClocksGeneric = 0;
3157  for ( i = 0; i < NUMTESTS; i++ ) {
3158  StartRecordTime( start );
3159  p_generic->CreateTextureSpaceLightVectors( lightVectors1, lightOrigin, drawVerts, COUNT, indexes, COUNT*3 );
3160  StopRecordTime( end );
3161  GetBest( start, end, bestClocksGeneric );
3162  }
3163  PrintClocks( "generic->CreateTextureSpaceLightVectors()", COUNT, bestClocksGeneric );
3164 
3165  bestClocksSIMD = 0;
3166  for ( i = 0; i < NUMTESTS; i++ ) {
3167  StartRecordTime( start );
3168  p_simd->CreateTextureSpaceLightVectors( lightVectors2, lightOrigin, drawVerts, COUNT, indexes, COUNT*3 );
3169  StopRecordTime( end );
3170  GetBest( start, end, bestClocksSIMD );
3171  }
3172 
3173  for ( i = 0; i < COUNT; i++ ) {
3174  if ( !lightVectors1[i].Compare( lightVectors2[i], 1e-4f ) ) {
3175  break;
3176  }
3177  }
3178  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
3179  PrintClocks( va( " simd->CreateTextureSpaceLightVectors() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
3180 }
3181 
3182 /*
3183 ============
3184 TestGetSpecularTextureCoords
3185 ============
3186 */
3188  int i, j;
3189  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
3190  ALIGN16( idDrawVert drawVerts[COUNT] );
3191  ALIGN16( idVec4 texCoords1[COUNT] );
3192  ALIGN16( idVec4 texCoords2[COUNT] );
3193  ALIGN16( int indexes[COUNT*3] );
3194  ALIGN16( idVec3 lightVectors1[COUNT] );
3195  ALIGN16( idVec3 lightVectors2[COUNT] );
3196  idVec3 lightOrigin, viewOrigin;
3197  const char *result;
3198 
3199  idRandom srnd( RANDOM_SEED );
3200 
3201  for ( i = 0; i < COUNT; i++ ) {
3202  for ( j = 0; j < 3; j++ ) {
3203  drawVerts[i].xyz[j] = srnd.CRandomFloat() * 100.0f;
3204  drawVerts[i].normal[j] = srnd.CRandomFloat();
3205  drawVerts[i].tangents[0][j] = srnd.CRandomFloat();
3206  drawVerts[i].tangents[1][j] = srnd.CRandomFloat();
3207  }
3208  }
3209 
3210  for ( i = 0; i < COUNT; i++ ) {
3211  indexes[i*3+0] = ( i + 0 ) % COUNT;
3212  indexes[i*3+1] = ( i + 1 ) % COUNT;
3213  indexes[i*3+2] = ( i + 2 ) % COUNT;
3214  }
3215 
3216  lightOrigin[0] = srnd.CRandomFloat() * 100.0f;
3217  lightOrigin[1] = srnd.CRandomFloat() * 100.0f;
3218  lightOrigin[2] = srnd.CRandomFloat() * 100.0f;
3219  viewOrigin[0] = srnd.CRandomFloat() * 100.0f;
3220  viewOrigin[1] = srnd.CRandomFloat() * 100.0f;
3221  viewOrigin[2] = srnd.CRandomFloat() * 100.0f;
3222 
3223  bestClocksGeneric = 0;
3224  for ( i = 0; i < NUMTESTS; i++ ) {
3225  StartRecordTime( start );
3226  p_generic->CreateSpecularTextureCoords( texCoords1, lightOrigin, viewOrigin, drawVerts, COUNT, indexes, COUNT*3 );
3227  StopRecordTime( end );
3228  GetBest( start, end, bestClocksGeneric );
3229  }
3230  PrintClocks( "generic->CreateSpecularTextureCoords()", COUNT, bestClocksGeneric );
3231 
3232  bestClocksSIMD = 0;
3233  for ( i = 0; i < NUMTESTS; i++ ) {
3234  StartRecordTime( start );
3235  p_simd->CreateSpecularTextureCoords( texCoords2, lightOrigin, viewOrigin, drawVerts, COUNT, indexes, COUNT*3 );
3236  StopRecordTime( end );
3237  GetBest( start, end, bestClocksSIMD );
3238  }
3239 
3240  for ( i = 0; i < COUNT; i++ ) {
3241  if ( !texCoords1[i].Compare( texCoords2[i], 1e-2f ) ) {
3242  break;
3243  }
3244  }
3245  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
3246  PrintClocks( va( " simd->CreateSpecularTextureCoords() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
3247 }
3248 
3249 /*
3250 ============
3251 TestCreateShadowCache
3252 ============
3253 */
3255  int i, j;
3256  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
3257  ALIGN16( idDrawVert drawVerts[COUNT] );
3258  ALIGN16( idVec4 vertexCache1[COUNT*2] );
3259  ALIGN16( idVec4 vertexCache2[COUNT*2] );
3260  ALIGN16( int originalVertRemap[COUNT] );
3261  ALIGN16( int vertRemap1[COUNT] );
3262  ALIGN16( int vertRemap2[COUNT] );
3263  ALIGN16( idVec3 lightOrigin );
3264  int numVerts1 = 0, numVerts2 = 0;
3265  const char *result;
3266 
3267  idRandom srnd( RANDOM_SEED );
3268 
3269  for ( i = 0; i < COUNT; i++ ) {
3270  drawVerts[i].xyz[0] = srnd.CRandomFloat() * 100.0f;
3271  drawVerts[i].xyz[1] = srnd.CRandomFloat() * 100.0f;
3272  drawVerts[i].xyz[2] = srnd.CRandomFloat() * 100.0f;
3273  originalVertRemap[i] = ( srnd.CRandomFloat() > 0.0f ) ? -1 : 0;
3274  }
3275  lightOrigin[0] = srnd.CRandomFloat() * 100.0f;
3276  lightOrigin[1] = srnd.CRandomFloat() * 100.0f;
3277  lightOrigin[2] = srnd.CRandomFloat() * 100.0f;
3278 
3279  bestClocksGeneric = 0;
3280  for ( i = 0; i < NUMTESTS; i++ ) {
3281  for ( j = 0; j < COUNT; j++ ) {
3282  vertRemap1[j] = originalVertRemap[j];
3283  }
3284  StartRecordTime( start );
3285  numVerts1 =p_generic->CreateShadowCache( vertexCache1, vertRemap1, lightOrigin, drawVerts, COUNT );
3286  StopRecordTime( end );
3287  GetBest( start, end, bestClocksGeneric );
3288  }
3289  PrintClocks( "generic->CreateShadowCache()", COUNT, bestClocksGeneric );
3290 
3291  bestClocksSIMD = 0;
3292  for ( i = 0; i < NUMTESTS; i++ ) {
3293  for ( j = 0; j < COUNT; j++ ) {
3294  vertRemap2[j] = originalVertRemap[j];
3295  }
3296  StartRecordTime( start );
3297  numVerts2 = p_simd->CreateShadowCache( vertexCache2, vertRemap2, lightOrigin, drawVerts, COUNT );
3298  StopRecordTime( end );
3299  GetBest( start, end, bestClocksSIMD );
3300  }
3301 
3302  for ( i = 0; i < COUNT; i++ ) {
3303  if ( i < ( numVerts1 / 2 ) ) {
3304  if ( !vertexCache1[i*2+0].Compare( vertexCache2[i*2+0], 1e-2f ) ) {
3305  break;
3306  }
3307  if ( !vertexCache1[i*2+1].Compare( vertexCache2[i*2+1], 1e-2f ) ) {
3308  break;
3309  }
3310  }
3311  if ( vertRemap1[i] != vertRemap2[i] ) {
3312  break;
3313  }
3314  }
3315 
3316  result = ( i >= COUNT && numVerts1 == numVerts2 ) ? "ok" : S_COLOR_RED"X";
3317  PrintClocks( va( " simd->CreateShadowCache() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
3318 
3319  bestClocksGeneric = 0;
3320  for ( i = 0; i < NUMTESTS; i++ ) {
3321  StartRecordTime( start );
3322  p_generic->CreateVertexProgramShadowCache( vertexCache1, drawVerts, COUNT );
3323  StopRecordTime( end );
3324  GetBest( start, end, bestClocksGeneric );
3325  }
3326  PrintClocks( "generic->CreateVertexProgramShadowCache()", COUNT, bestClocksGeneric );
3327 
3328  bestClocksSIMD = 0;
3329  for ( i = 0; i < NUMTESTS; i++ ) {
3330  StartRecordTime( start );
3331  p_simd->CreateVertexProgramShadowCache( vertexCache2, drawVerts, COUNT );
3332  StopRecordTime( end );
3333  GetBest( start, end, bestClocksSIMD );
3334  }
3335 
3336  for ( i = 0; i < COUNT; i++ ) {
3337  if ( !vertexCache1[i*2+0].Compare( vertexCache2[i*2+0], 1e-2f ) ) {
3338  break;
3339  }
3340  if ( !vertexCache1[i*2+1].Compare( vertexCache2[i*2+1], 1e-2f ) ) {
3341  break;
3342  }
3343  }
3344  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
3345  PrintClocks( va( " simd->CreateVertexProgramShadowCache() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
3346 }
3347 
3348 /*
3349 ============
3350 TestSoundUpSampling
3351 ============
3352 */
3353 #define SOUND_UPSAMPLE_EPSILON 1.0f
3354 
3355 void TestSoundUpSampling( void ) {
3356  int i;
3357  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
3358  ALIGN16( short pcm[MIXBUFFER_SAMPLES*2] );
3359  ALIGN16( float ogg0[MIXBUFFER_SAMPLES*2] );
3360  ALIGN16( float ogg1[MIXBUFFER_SAMPLES*2] );
3361  ALIGN16( float samples1[MIXBUFFER_SAMPLES*2] );
3362  ALIGN16( float samples2[MIXBUFFER_SAMPLES*2] );
3363  float *ogg[2];
3364  int kHz, numSpeakers;
3365  const char *result;
3366 
3367  idRandom srnd( RANDOM_SEED );
3368 
3369  for ( i = 0; i < MIXBUFFER_SAMPLES*2; i++ ) {
3370  pcm[i] = srnd.RandomInt( (1<<16) ) - (1<<15);
3371  ogg0[i] = srnd.RandomFloat();
3372  ogg1[i] = srnd.RandomFloat();
3373  }
3374 
3375  ogg[0] = ogg0;
3376  ogg[1] = ogg1;
3377 
3378  for ( numSpeakers = 1; numSpeakers <= 2; numSpeakers++ ) {
3379 
3380  for ( kHz = 11025; kHz <= 44100; kHz *= 2 ) {
3381  bestClocksGeneric = 0;
3382  for ( i = 0; i < NUMTESTS; i++ ) {
3383  StartRecordTime( start );
3384  p_generic->UpSamplePCMTo44kHz( samples1, pcm, MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, kHz, numSpeakers );
3385  StopRecordTime( end );
3386  GetBest( start, end, bestClocksGeneric );
3387  }
3388  PrintClocks( va( "generic->UpSamplePCMTo44kHz( %d, %d )", kHz, numSpeakers ), MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, bestClocksGeneric );
3389 
3390  bestClocksSIMD = 0;
3391  for ( i = 0; i < NUMTESTS; i++ ) {
3392  StartRecordTime( start );
3393  p_simd->UpSamplePCMTo44kHz( samples2, pcm, MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, kHz, numSpeakers );
3394  StopRecordTime( end );
3395  GetBest( start, end, bestClocksSIMD );
3396  }
3397 
3398  for ( i = 0; i < MIXBUFFER_SAMPLES*numSpeakers; i++ ) {
3399  if ( idMath::Fabs( samples1[i] - samples2[i] ) > SOUND_UPSAMPLE_EPSILON ) {
3400  break;
3401  }
3402  }
3403  result = ( i >= MIXBUFFER_SAMPLES*numSpeakers ) ? "ok" : S_COLOR_RED"X";
3404  PrintClocks( va( " simd->UpSamplePCMTo44kHz( %d, %d ) %s", kHz, numSpeakers, result ), MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, bestClocksSIMD, bestClocksGeneric );
3405  }
3406  }
3407 
3408  for ( numSpeakers = 1; numSpeakers <= 2; numSpeakers++ ) {
3409 
3410  for ( kHz = 11025; kHz <= 44100; kHz *= 2 ) {
3411  bestClocksGeneric = 0;
3412  for ( i = 0; i < NUMTESTS; i++ ) {
3413  StartRecordTime( start );
3414  p_generic->UpSampleOGGTo44kHz( samples1, ogg, MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, kHz, numSpeakers );
3415  StopRecordTime( end );
3416  GetBest( start, end, bestClocksGeneric );
3417  }
3418  PrintClocks( va( "generic->UpSampleOGGTo44kHz( %d, %d )", kHz, numSpeakers ), MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, bestClocksGeneric );
3419 
3420  bestClocksSIMD = 0;
3421  for ( i = 0; i < NUMTESTS; i++ ) {
3422  StartRecordTime( start );
3423  p_simd->UpSampleOGGTo44kHz( samples2, ogg, MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, kHz, numSpeakers );
3424  StopRecordTime( end );
3425  GetBest( start, end, bestClocksSIMD );
3426  }
3427 
3428  for ( i = 0; i < MIXBUFFER_SAMPLES*numSpeakers; i++ ) {
3429  if ( idMath::Fabs( samples1[i] - samples2[i] ) > SOUND_UPSAMPLE_EPSILON ) {
3430  break;
3431  }
3432  }
3433  result = ( i >= MIXBUFFER_SAMPLES ) ? "ok" : S_COLOR_RED"X";
3434  PrintClocks( va( " simd->UpSampleOGGTo44kHz( %d, %d ) %s", kHz, numSpeakers, result ), MIXBUFFER_SAMPLES*numSpeakers*kHz/44100, bestClocksSIMD, bestClocksGeneric );
3435  }
3436  }
3437 }
3438 
3439 /*
3440 ============
3441 TestSoundMixing
3442 ============
3443 */
3444 #define SOUND_MIX_EPSILON 2.0f
3445 
3446 void TestSoundMixing( void ) {
3447  int i, j;
3448  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
3449  ALIGN16( float origMixBuffer[MIXBUFFER_SAMPLES*6] );
3450  ALIGN16( float mixBuffer1[MIXBUFFER_SAMPLES*6] );
3451  ALIGN16( float mixBuffer2[MIXBUFFER_SAMPLES*6] );
3452  ALIGN16( float samples[MIXBUFFER_SAMPLES*6] );
3453  ALIGN16( short outSamples1[MIXBUFFER_SAMPLES*6] );
3454  ALIGN16( short outSamples2[MIXBUFFER_SAMPLES*6] );
3455  float lastV[6];
3456  float currentV[6];
3457  const char *result;
3458 
3459  idRandom srnd( RANDOM_SEED );
3460 
3461  for ( i = 0; i < 6; i++ ) {
3462  lastV[i] = srnd.CRandomFloat();
3463  currentV[i] = srnd.CRandomFloat();
3464  }
3465 
3466  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3467  origMixBuffer[i] = srnd.CRandomFloat();
3468  samples[i] = srnd.RandomInt( (1<<16) ) - (1<<15);
3469  }
3470 
3471  bestClocksGeneric = 0;
3472  for ( i = 0; i < NUMTESTS; i++ ) {
3473  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3474  mixBuffer1[j] = origMixBuffer[j];
3475  }
3476  StartRecordTime( start );
3477  p_generic->MixSoundTwoSpeakerMono( mixBuffer1, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3478  StopRecordTime( end );
3479  GetBest( start, end, bestClocksGeneric );
3480  }
3481  PrintClocks( "generic->MixSoundTwoSpeakerMono()", MIXBUFFER_SAMPLES, bestClocksGeneric );
3482 
3483 
3484  bestClocksSIMD = 0;
3485  for ( i = 0; i < NUMTESTS; i++ ) {
3486  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3487  mixBuffer2[j] = origMixBuffer[j];
3488  }
3489  StartRecordTime( start );
3490  p_simd->MixSoundTwoSpeakerMono( mixBuffer2, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3491  StopRecordTime( end );
3492  GetBest( start, end, bestClocksSIMD );
3493  }
3494 
3495  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3496  if ( idMath::Fabs( mixBuffer1[i] - mixBuffer2[i] ) > SOUND_MIX_EPSILON ) {
3497  break;
3498  }
3499  }
3500  result = ( i >= MIXBUFFER_SAMPLES*6 ) ? "ok" : S_COLOR_RED"X";
3501  PrintClocks( va( " simd->MixSoundTwoSpeakerMono() %s", result ), MIXBUFFER_SAMPLES, bestClocksSIMD, bestClocksGeneric );
3502 
3503  bestClocksGeneric = 0;
3504  for ( i = 0; i < NUMTESTS; i++ ) {
3505  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3506  mixBuffer1[j] = origMixBuffer[j];
3507  }
3508  StartRecordTime( start );
3509  p_generic->MixSoundTwoSpeakerStereo( mixBuffer1, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3510  StopRecordTime( end );
3511  GetBest( start, end, bestClocksGeneric );
3512  }
3513  PrintClocks( "generic->MixSoundTwoSpeakerStereo()", MIXBUFFER_SAMPLES, bestClocksGeneric );
3514 
3515 
3516  bestClocksSIMD = 0;
3517  for ( i = 0; i < NUMTESTS; i++ ) {
3518  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3519  mixBuffer2[j] = origMixBuffer[j];
3520  }
3521  StartRecordTime( start );
3522  p_simd->MixSoundTwoSpeakerStereo( mixBuffer2, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3523  StopRecordTime( end );
3524  GetBest( start, end, bestClocksSIMD );
3525  }
3526 
3527  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3528  if ( idMath::Fabs( mixBuffer1[i] - mixBuffer2[i] ) > SOUND_MIX_EPSILON ) {
3529  break;
3530  }
3531  }
3532  result = ( i >= MIXBUFFER_SAMPLES*6 ) ? "ok" : S_COLOR_RED"X";
3533  PrintClocks( va( " simd->MixSoundTwoSpeakerStereo() %s", result ), MIXBUFFER_SAMPLES, bestClocksSIMD, bestClocksGeneric );
3534 
3535 
3536  bestClocksGeneric = 0;
3537  for ( i = 0; i < NUMTESTS; i++ ) {
3538  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3539  mixBuffer1[j] = origMixBuffer[j];
3540  }
3541  StartRecordTime( start );
3542  p_generic->MixSoundSixSpeakerMono( mixBuffer1, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3543  StopRecordTime( end );
3544  GetBest( start, end, bestClocksGeneric );
3545  }
3546  PrintClocks( "generic->MixSoundSixSpeakerMono()", MIXBUFFER_SAMPLES, bestClocksGeneric );
3547 
3548 
3549  bestClocksSIMD = 0;
3550  for ( i = 0; i < NUMTESTS; i++ ) {
3551  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3552  mixBuffer2[j] = origMixBuffer[j];
3553  }
3554  StartRecordTime( start );
3555  p_simd->MixSoundSixSpeakerMono( mixBuffer2, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3556  StopRecordTime( end );
3557  GetBest( start, end, bestClocksSIMD );
3558  }
3559 
3560  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3561  if ( idMath::Fabs( mixBuffer1[i] - mixBuffer2[i] ) > SOUND_MIX_EPSILON ) {
3562  break;
3563  }
3564  }
3565  result = ( i >= MIXBUFFER_SAMPLES*6 ) ? "ok" : S_COLOR_RED"X";
3566  PrintClocks( va( " simd->MixSoundSixSpeakerMono() %s", result ), MIXBUFFER_SAMPLES, bestClocksSIMD, bestClocksGeneric );
3567 
3568  bestClocksGeneric = 0;
3569  for ( i = 0; i < NUMTESTS; i++ ) {
3570  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3571  mixBuffer1[j] = origMixBuffer[j];
3572  }
3573  StartRecordTime( start );
3574  p_generic->MixSoundSixSpeakerStereo( mixBuffer1, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3575  StopRecordTime( end );
3576  GetBest( start, end, bestClocksGeneric );
3577  }
3578  PrintClocks( "generic->MixSoundSixSpeakerStereo()", MIXBUFFER_SAMPLES, bestClocksGeneric );
3579 
3580 
3581  bestClocksSIMD = 0;
3582  for ( i = 0; i < NUMTESTS; i++ ) {
3583  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3584  mixBuffer2[j] = origMixBuffer[j];
3585  }
3586  StartRecordTime( start );
3587  p_simd->MixSoundSixSpeakerStereo( mixBuffer2, samples, MIXBUFFER_SAMPLES, lastV, currentV );
3588  StopRecordTime( end );
3589  GetBest( start, end, bestClocksSIMD );
3590  }
3591 
3592  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3593  if ( idMath::Fabs( mixBuffer1[i] - mixBuffer2[i] ) > SOUND_MIX_EPSILON ) {
3594  break;
3595  }
3596  }
3597  result = ( i >= MIXBUFFER_SAMPLES*6 ) ? "ok" : S_COLOR_RED"X";
3598  PrintClocks( va( " simd->MixSoundSixSpeakerStereo() %s", result ), MIXBUFFER_SAMPLES, bestClocksSIMD, bestClocksGeneric );
3599 
3600 
3601  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3602  origMixBuffer[i] = srnd.RandomInt( (1<<17) ) - (1<<16);
3603  }
3604 
3605  bestClocksGeneric = 0;
3606  for ( i = 0; i < NUMTESTS; i++ ) {
3607  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3608  mixBuffer1[j] = origMixBuffer[j];
3609  }
3610  StartRecordTime( start );
3611  p_generic->MixedSoundToSamples( outSamples1, mixBuffer1, MIXBUFFER_SAMPLES*6 );
3612  StopRecordTime( end );
3613  GetBest( start, end, bestClocksGeneric );
3614  }
3615  PrintClocks( "generic->MixedSoundToSamples()", MIXBUFFER_SAMPLES, bestClocksGeneric );
3616 
3617  bestClocksSIMD = 0;
3618  for ( i = 0; i < NUMTESTS; i++ ) {
3619  for ( j = 0; j < MIXBUFFER_SAMPLES*6; j++ ) {
3620  mixBuffer2[j] = origMixBuffer[j];
3621  }
3622  StartRecordTime( start );
3623  p_simd->MixedSoundToSamples( outSamples2, mixBuffer2, MIXBUFFER_SAMPLES*6 );
3624  StopRecordTime( end );
3625  GetBest( start, end, bestClocksSIMD );
3626  }
3627 
3628  for ( i = 0; i < MIXBUFFER_SAMPLES*6; i++ ) {
3629  if ( outSamples1[i] != outSamples2[i] ) {
3630  break;
3631  }
3632  }
3633  result = ( i >= MIXBUFFER_SAMPLES*6 ) ? "ok" : S_COLOR_RED"X";
3634  PrintClocks( va( " simd->MixedSoundToSamples() %s", result ), MIXBUFFER_SAMPLES, bestClocksSIMD, bestClocksGeneric );
3635 }
3636 
3637 /*
3638 ============
3639 TestMath
3640 ============
3641 */
3642 void TestMath( void ) {
3643  int i;
3644  TIME_TYPE start, end, bestClocks;
3645 
3646  idLib::common->Printf("====================================\n" );
3647 
3648  float tst = -1.0f;
3649  float tst2 = 1.0f;
3650  float testvar = 1.0f;
3651  idRandom rnd;
3652 
3653  bestClocks = 0;
3654  tst = rnd.CRandomFloat();
3655  for ( i = 0; i < NUMTESTS; i++ ) {
3656  StartRecordTime( start );
3657  tst = fabs( tst );
3658  StopRecordTime( end );
3659  GetBest( start, end, bestClocks );
3660  testvar = ( testvar + tst ) * tst;
3661  tst = rnd.CRandomFloat();
3662  }
3663  PrintClocks( " fabs( tst )", 1, bestClocks );
3664 
3665  bestClocks = 0;
3666  tst = rnd.CRandomFloat();
3667  for ( i = 0; i < NUMTESTS; i++ ) {
3668  StartRecordTime( start );
3669  int tmp = * ( int * ) &tst;
3670  tmp &= 0x7FFFFFFF;
3671  tst = * ( float * ) &tmp;
3672  StopRecordTime( end );
3673  GetBest( start, end, bestClocks );
3674  testvar = ( testvar + tst ) * tst;
3675  tst = rnd.CRandomFloat();
3676  }
3677  PrintClocks( " idMath::Fabs( tst )", 1, bestClocks );
3678 
3679  bestClocks = 0;
3680  tst = 10.0f + 100.0f * rnd.RandomFloat();
3681  for ( i = 0; i < NUMTESTS; i++ ) {
3682  StartRecordTime( start );
3683  tst = sqrt( tst );
3684  StopRecordTime( end );
3685  GetBest( start, end, bestClocks );
3686  testvar = ( testvar + tst ) * tst * 0.01f;
3687  tst = 10.0f + 100.0f * rnd.RandomFloat();
3688  }
3689  PrintClocks( " sqrt( tst )", 1, bestClocks );
3690 
3691  bestClocks = 0;
3692  tst = rnd.RandomFloat();
3693  for ( i = 0; i < NUMTESTS; i++ ) {
3694  StartRecordTime( start );
3695  tst = idMath::Sqrt( tst );
3696  StopRecordTime( end );
3697  GetBest( start, end, bestClocks );
3698  testvar = ( testvar + tst ) * tst;
3699  tst = rnd.RandomFloat();
3700  }
3701  PrintClocks( " idMath::Sqrt( tst )", 1, bestClocks );
3702 
3703  bestClocks = 0;
3704  tst = rnd.RandomFloat();
3705  for ( i = 0; i < NUMTESTS; i++ ) {
3706  StartRecordTime( start );
3707  tst = idMath::Sqrt16( tst );
3708  StopRecordTime( end );
3709  GetBest( start, end, bestClocks );
3710  testvar = ( testvar + tst ) * tst;
3711  tst = rnd.RandomFloat();
3712  }
3713  PrintClocks( " idMath::Sqrt16( tst )", 1, bestClocks );
3714 
3715  bestClocks = 0;
3716  tst = rnd.RandomFloat();
3717  for ( i = 0; i < NUMTESTS; i++ ) {
3718  StartRecordTime( start );
3719  tst = idMath::Sqrt64( tst );
3720  StopRecordTime( end );
3721  GetBest( start, end, bestClocks );
3722  testvar = ( testvar + tst ) * tst;
3723  tst = rnd.RandomFloat();
3724  }
3725  PrintClocks( " idMath::Sqrt64( tst )", 1, bestClocks );
3726 
3727  bestClocks = 0;
3728  tst = rnd.RandomFloat();
3729  for ( i = 0; i < NUMTESTS; i++ ) {
3730  StartRecordTime( start );
3731  tst = tst * idMath::RSqrt( tst );
3732  StopRecordTime( end );
3733  GetBest( start, end, bestClocks );
3734  testvar = ( testvar + tst ) * tst;
3735  tst = rnd.RandomFloat();
3736  }
3737  PrintClocks( " idMath::RSqrt( tst )", 1, bestClocks );
3738 
3739  bestClocks = 0;
3740  tst = rnd.CRandomFloat();
3741  for ( i = 0; i < NUMTESTS; i++ ) {
3742  StartRecordTime( start );
3743  tst = idMath::Sin( tst );
3744  StopRecordTime( end );
3745  GetBest( start, end, bestClocks );
3746  testvar = ( testvar + tst ) * tst;
3747  tst = rnd.CRandomFloat();
3748  }
3749  PrintClocks( " idMath::Sin( tst )", 1, bestClocks );
3750 
3751  bestClocks = 0;
3752  tst = rnd.CRandomFloat();
3753  for ( i = 0; i < NUMTESTS; i++ ) {
3754  StartRecordTime( start );
3755  tst = idMath::Sin16( tst );
3756  StopRecordTime( end );
3757  GetBest( start, end, bestClocks );
3758  testvar = ( testvar + tst ) * tst;
3759  tst = rnd.CRandomFloat();
3760  }
3761  PrintClocks( " idMath::Sin16( tst )", 1, bestClocks );
3762 
3763  bestClocks = 0;
3764  tst = rnd.CRandomFloat();
3765  for ( i = 0; i < NUMTESTS; i++ ) {
3766  StartRecordTime( start );
3767  tst = idMath::Cos( tst );
3768  StopRecordTime( end );
3769  GetBest( start, end, bestClocks );
3770  testvar = ( testvar + tst ) * tst;
3771  tst = rnd.CRandomFloat();
3772  }
3773  PrintClocks( " idMath::Cos( tst )", 1, bestClocks );
3774 
3775  bestClocks = 0;
3776  tst = rnd.CRandomFloat();
3777  for ( i = 0; i < NUMTESTS; i++ ) {
3778  StartRecordTime( start );
3779  tst = idMath::Cos16( tst );
3780  StopRecordTime( end );
3781  GetBest( start, end, bestClocks );
3782  testvar = ( testvar + tst ) * tst;
3783  tst = rnd.CRandomFloat();
3784  }
3785  PrintClocks( " idMath::Cos16( tst )", 1, bestClocks );
3786 
3787  bestClocks = 0;
3788  tst = rnd.CRandomFloat();
3789  for ( i = 0; i < NUMTESTS; i++ ) {
3790  StartRecordTime( start );
3791  idMath::SinCos( tst, tst, tst2 );
3792  StopRecordTime( end );
3793  GetBest( start, end, bestClocks );
3794  testvar = ( testvar + tst ) * tst;
3795  tst = rnd.CRandomFloat();
3796  }
3797  PrintClocks( " idMath::SinCos( tst )", 1, bestClocks );
3798 
3799  bestClocks = 0;
3800  tst = rnd.CRandomFloat();
3801  for ( i = 0; i < NUMTESTS; i++ ) {
3802  StartRecordTime( start );
3803  idMath::SinCos16( tst, tst, tst2 );
3804  StopRecordTime( end );
3805  GetBest( start, end, bestClocks );
3806  testvar = ( testvar + tst ) * tst;
3807  tst = rnd.CRandomFloat();
3808  }
3809  PrintClocks( "idMath::SinCos16( tst )", 1, bestClocks );
3810 
3811  bestClocks = 0;
3812  tst = rnd.CRandomFloat();
3813  for ( i = 0; i < NUMTESTS; i++ ) {
3814  StartRecordTime( start );
3815  tst = idMath::Tan( tst );
3816  StopRecordTime( end );
3817  GetBest( start, end, bestClocks );
3818  testvar = ( testvar + tst ) * tst;
3819  tst = rnd.CRandomFloat();
3820  }
3821  PrintClocks( " idMath::Tan( tst )", 1, bestClocks );
3822 
3823  bestClocks = 0;
3824  tst = rnd.CRandomFloat();
3825  for ( i = 0; i < NUMTESTS; i++ ) {
3826  StartRecordTime( start );
3827  tst = idMath::Tan16( tst );
3828  StopRecordTime( end );
3829  GetBest( start, end, bestClocks );
3830  testvar = ( testvar + tst ) * tst;
3831  tst = rnd.CRandomFloat();
3832  }
3833  PrintClocks( " idMath::Tan16( tst )", 1, bestClocks );
3834 
3835  bestClocks = 0;
3836  tst = rnd.CRandomFloat();
3837  for ( i = 0; i < NUMTESTS; i++ ) {
3838  StartRecordTime( start );
3839  tst = idMath::ASin( tst );
3840  StopRecordTime( end );
3841  GetBest( start, end, bestClocks );
3842  testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
3843  tst = rnd.CRandomFloat();
3844  }
3845  PrintClocks( " idMath::ASin( tst )", 1, bestClocks );
3846 
3847  bestClocks = 0;
3848  tst = rnd.CRandomFloat();
3849  for ( i = 0; i < NUMTESTS; i++ ) {
3850  StartRecordTime( start );
3851  tst = idMath::ASin16( tst );
3852  StopRecordTime( end );
3853  GetBest( start, end, bestClocks );
3854  testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
3855  tst = rnd.CRandomFloat();
3856  }
3857  PrintClocks( " idMath::ASin16( tst )", 1, bestClocks );
3858 
3859  bestClocks = 0;
3860  tst = rnd.CRandomFloat();
3861  for ( i = 0; i < NUMTESTS; i++ ) {
3862  StartRecordTime( start );
3863  tst = idMath::ACos( tst );
3864  StopRecordTime( end );
3865  GetBest( start, end, bestClocks );
3866  testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
3867  tst = rnd.CRandomFloat();
3868  }
3869  PrintClocks( " idMath::ACos( tst )", 1, bestClocks );
3870 
3871  bestClocks = 0;
3872  tst = rnd.CRandomFloat();
3873  for ( i = 0; i < NUMTESTS; i++ ) {
3874  StartRecordTime( start );
3875  tst = idMath::ACos16( tst );
3876  StopRecordTime( end );
3877  GetBest( start, end, bestClocks );
3878  testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
3879  tst = rnd.CRandomFloat();
3880  }
3881  PrintClocks( " idMath::ACos16( tst )", 1, bestClocks );
3882 
3883  bestClocks = 0;
3884  tst = rnd.CRandomFloat();
3885  for ( i = 0; i < NUMTESTS; i++ ) {
3886  StartRecordTime( start );
3887  tst = idMath::ATan( tst );
3888  StopRecordTime( end );
3889  GetBest( start, end, bestClocks );
3890  testvar = ( testvar + tst ) * tst;
3891  tst = rnd.CRandomFloat();
3892  }
3893  PrintClocks( " idMath::ATan( tst )", 1, bestClocks );
3894 
3895  bestClocks = 0;
3896  tst = rnd.CRandomFloat();
3897  for ( i = 0; i < NUMTESTS; i++ ) {
3898  StartRecordTime( start );
3899  tst = idMath::ATan16( tst );
3900  StopRecordTime( end );
3901  GetBest( start, end, bestClocks );
3902  testvar = ( testvar + tst ) * tst;
3903  tst = rnd.CRandomFloat();
3904  }
3905  PrintClocks( " idMath::ATan16( tst )", 1, bestClocks );
3906 
3907  bestClocks = 0;
3908  tst = rnd.CRandomFloat();
3909  for ( i = 0; i < NUMTESTS; i++ ) {
3910  StartRecordTime( start );
3911  tst = idMath::Pow( 2.7f, tst );
3912  StopRecordTime( end );
3913  GetBest( start, end, bestClocks );
3914  testvar = ( testvar + tst ) * tst * 0.1f;
3915  tst = rnd.CRandomFloat();
3916  }
3917  PrintClocks( " idMath::Pow( tst )", 1, bestClocks );
3918 
3919  bestClocks = 0;
3920  tst = rnd.CRandomFloat();
3921  for ( i = 0; i < NUMTESTS; i++ ) {
3922  StartRecordTime( start );
3923  tst = idMath::Pow16( 2.7f, tst );
3924  StopRecordTime( end );
3925  GetBest( start, end, bestClocks );
3926  testvar = ( testvar + tst ) * tst * 0.1f;
3927  tst = rnd.CRandomFloat();
3928  }
3929  PrintClocks( " idMath::Pow16( tst )", 1, bestClocks );
3930 
3931  bestClocks = 0;
3932  tst = rnd.CRandomFloat();
3933  for ( i = 0; i < NUMTESTS; i++ ) {
3934  StartRecordTime( start );
3935  tst = idMath::Exp( tst );
3936  StopRecordTime( end );
3937  GetBest( start, end, bestClocks );
3938  testvar = ( testvar + tst ) * tst * 0.1f;
3939  tst = rnd.CRandomFloat();
3940  }
3941  PrintClocks( " idMath::Exp( tst )", 1, bestClocks );
3942 
3943  bestClocks = 0;
3944  tst = rnd.CRandomFloat();
3945  for ( i = 0; i < NUMTESTS; i++ ) {
3946  StartRecordTime( start );
3947  tst = idMath::Exp16( tst );
3948  StopRecordTime( end );
3949  GetBest( start, end, bestClocks );
3950  testvar = ( testvar + tst ) * tst * 0.1f;
3951  tst = rnd.CRandomFloat();
3952  }
3953  PrintClocks( " idMath::Exp16( tst )", 1, bestClocks );
3954 
3955  bestClocks = 0;
3956  tst = rnd.CRandomFloat();
3957  for ( i = 0; i < NUMTESTS; i++ ) {
3958  tst = fabs( tst ) + 1.0f;
3959  StartRecordTime( start );
3960  tst = idMath::Log( tst );
3961  StopRecordTime( end );
3962  GetBest( start, end, bestClocks );
3963  testvar = ( testvar + tst ) * tst;
3964  tst = rnd.CRandomFloat();
3965  }
3966  PrintClocks( " idMath::Log( tst )", 1, bestClocks );
3967 
3968  bestClocks = 0;
3969  tst = rnd.CRandomFloat();
3970  for ( i = 0; i < NUMTESTS; i++ ) {
3971  tst = fabs( tst ) + 1.0f;
3972  StartRecordTime( start );
3973  tst = idMath::Log16( tst );
3974  StopRecordTime( end );
3975  GetBest( start, end, bestClocks );
3976  testvar = ( testvar + tst ) * tst;
3977  tst = rnd.CRandomFloat();
3978  }
3979  PrintClocks( " idMath::Log16( tst )", 1, bestClocks );
3980 
3981  idLib::common->Printf( "testvar = %f\n", testvar );
3982 
3983  idMat3 resultMat3;
3984  idQuat fromQuat, toQuat, resultQuat;
3985  idCQuat cq;
3986  idAngles ang;
3987 
3988  fromQuat = idAngles( 30, 45, 0 ).ToQuat();
3989  toQuat = idAngles( 45, 0, 0 ).ToQuat();
3990  cq = idAngles( 30, 45, 0 ).ToQuat().ToCQuat();
3991  ang = idAngles( 30, 40, 50 );
3992 
3993  bestClocks = 0;
3994  for ( i = 0; i < NUMTESTS; i++ ) {
3995  StartRecordTime( start );
3996  resultMat3 = fromQuat.ToMat3();
3997  StopRecordTime( end );
3998  GetBest( start, end, bestClocks );
3999  }
4000  PrintClocks( " idQuat::ToMat3()", 1, bestClocks );
4001 
4002  bestClocks = 0;
4003  for ( i = 0; i < NUMTESTS; i++ ) {
4004  StartRecordTime( start );
4005  resultQuat.Slerp( fromQuat, toQuat, 0.3f );
4006  StopRecordTime( end );
4007  GetBest( start, end, bestClocks );
4008  }
4009  PrintClocks( " idQuat::Slerp()", 1, bestClocks );
4010 
4011  bestClocks = 0;
4012  for ( i = 0; i < NUMTESTS; i++ ) {
4013  StartRecordTime( start );
4014  resultQuat = cq.ToQuat();
4015  StopRecordTime( end );
4016  GetBest( start, end, bestClocks );
4017  }
4018  PrintClocks( " idCQuat::ToQuat()", 1, bestClocks );
4019 
4020  bestClocks = 0;
4021  for ( i = 0; i < NUMTESTS; i++ ) {
4022  StartRecordTime( start );
4023  resultQuat = ang.ToQuat();
4024  StopRecordTime( end );
4025  GetBest( start, end, bestClocks );
4026  }
4027  PrintClocks( " idAngles::ToQuat()", 1, bestClocks );
4028 
4029  bestClocks = 0;
4030  for ( i = 0; i < NUMTESTS; i++ ) {
4031  StartRecordTime( start );
4032  resultMat3 = ang.ToMat3();
4033  StopRecordTime( end );
4034  GetBest( start, end, bestClocks );
4035  }
4036  PrintClocks( " idAngles::ToMat3()", 1, bestClocks );
4037 }
4038 
4039 /*
4040 ============
4041 TestNegate
4042 ============
4043 */
4044 
4045 // this wasn't previously in the test
4046 void TestNegate( void ) {
4047  int i;
4048  TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
4049  ALIGN16( float fsrc0[COUNT] );
4050  ALIGN16( float fsrc1[COUNT] );
4051  ALIGN16( float fsrc2[COUNT] );
4052 
4053  const char *result;
4054 
4055  idRandom srnd( RANDOM_SEED );
4056 
4057  for ( i = 0; i < COUNT; i++ ) {
4058  fsrc0[i] = fsrc1[i] = fsrc2[i] = srnd.CRandomFloat() * 10.0f;
4059  //fsrc1[i] = srnd.CRandomFloat() * 10.0f;
4060  }
4061 
4062  idLib::common->Printf("====================================\n" );
4063 
4064  bestClocksGeneric = 0;
4065  for ( i = 0; i < NUMTESTS; i++ ) {
4066 
4067  memcpy( &fsrc1[0], &fsrc0[0], COUNT * sizeof(float) );
4068 
4069  StartRecordTime( start );
4070  p_generic->Negate16( fsrc1, COUNT );
4071  StopRecordTime( end );
4072  GetBest( start, end, bestClocksGeneric );
4073  }
4074  PrintClocks( "generic->Negate16( float[] )", COUNT, bestClocksGeneric );
4075 
4076  bestClocksSIMD = 0;
4077  for ( i = 0; i < NUMTESTS; i++ ) {
4078 
4079  memcpy( &fsrc2[0], &fsrc0[0], COUNT * sizeof(float) );
4080 
4081  StartRecordTime( start );
4082  p_simd->Negate16( fsrc2, COUNT );
4083  StopRecordTime( end );
4084  GetBest( start, end, bestClocksSIMD );
4085  }
4086 
4087  for ( i = 0; i < COUNT; i++ ) {
4088  if ( fsrc1[i] != fsrc2[i] ) {
4089  break;
4090  }
4091  }
4092  result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
4093  PrintClocks( va( " simd->Negate16( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
4094 }
4095 
4096 
4097 /*
4098 ============
4099 idSIMD::Test_f
4100 ============
4101 */
4102 void idSIMD::Test_f( const idCmdArgs &args ) {
4103 
4104 #ifdef _WIN32
4105  SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
4106 #endif /* _WIN32 */
4107 
4108  p_simd = processor;
4109  p_generic = generic;
4110 
4111  if ( idStr::Length( args.Argv( 1 ) ) != 0 ) {
4112  cpuid_t cpuid = idLib::sys->GetProcessorId();
4113  idStr argString = args.Args();
4114 
4115  argString.Replace( " ", "" );
4116 
4117  if ( idStr::Icmp( argString, "MMX" ) == 0 ) {
4118  if ( !( cpuid & CPUID_MMX ) ) {
4119  common->Printf( "CPU does not support MMX\n" );
4120  return;
4121  }
4122  p_simd = new idSIMD_MMX;
4123  } else if ( idStr::Icmp( argString, "3DNow" ) == 0 ) {
4124  if ( !( cpuid & CPUID_MMX ) || !( cpuid & CPUID_3DNOW ) ) {
4125  common->Printf( "CPU does not support MMX & 3DNow\n" );
4126  return;
4127  }
4128  p_simd = new idSIMD_3DNow;
4129  } else if ( idStr::Icmp( argString, "SSE" ) == 0 ) {
4130  if ( !( cpuid & CPUID_MMX ) || !( cpuid & CPUID_SSE ) ) {
4131  common->Printf( "CPU does not support MMX & SSE\n" );
4132  return;
4133  }
4134  p_simd = new idSIMD_SSE;
4135  } else if ( idStr::Icmp( argString, "SSE2" ) == 0 ) {
4136  if ( !( cpuid & CPUID_MMX ) || !( cpuid & CPUID_SSE ) || !( cpuid & CPUID_SSE2 ) ) {
4137  common->Printf( "CPU does not support MMX & SSE & SSE2\n" );
4138  return;
4139  }
4140  p_simd = new idSIMD_SSE2;
4141  } else if ( idStr::Icmp( argString, "SSE3" ) == 0 ) {
4142  if ( !( cpuid & CPUID_MMX ) || !( cpuid & CPUID_SSE ) || !( cpuid & CPUID_SSE2 ) || !( cpuid & CPUID_SSE3 ) ) {
4143  common->Printf( "CPU does not support MMX & SSE & SSE2 & SSE3\n" );
4144  return;
4145  }
4146  p_simd = new idSIMD_SSE3();
4147  } else if ( idStr::Icmp( argString, "AltiVec" ) == 0 ) {
4148  if ( !( cpuid & CPUID_ALTIVEC ) ) {
4149  common->Printf( "CPU does not support AltiVec\n" );
4150  return;
4151  }
4152  p_simd = new idSIMD_AltiVec();
4153  } else {
4154  common->Printf( "invalid argument, use: MMX, 3DNow, SSE, SSE2, SSE3, AltiVec\n" );
4155  return;
4156  }
4157  }
4158 
4160 
4161  idLib::common->Printf( "using %s for SIMD processing\n", p_simd->GetName() );
4162 
4163  GetBaseClocks();
4164 
4165  TestMath();
4166  TestAdd();
4167  TestSub();
4168  TestMul();
4169  TestDiv();
4170  TestMulAdd();
4171  TestMulSub();
4172  TestDot();
4173  TestCompare();
4174  TestMinMax();
4175  TestClamp();
4176  TestMemcpy();
4177  TestMemset();
4178  TestNegate();
4179 
4189 
4190  idLib::common->Printf("====================================\n" );
4191 
4192  TestBlendJoints();
4208 
4209  idLib::common->Printf("====================================\n" );
4210 
4212  TestSoundMixing();
4213 
4214  idLib::common->SetRefreshOnPrint( false );
4215 
4216  if ( p_simd != processor ) {
4217  delete p_simd;
4218  }
4219  p_simd = NULL;
4220  p_generic = NULL;
4221 
4222 #ifdef _WIN32
4223  SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_NORMAL );
4224 #endif /* _WIN32 */
4225 }
GLdouble GLdouble GLdouble GLdouble q
Definition: glext.h:2959
#define MATX_LDLT_SIMD_EPSILON
Definition: Simd.cpp:2235
static float ATan16(float a)
Definition: Math.h:583
#define COUNT
Definition: Simd.cpp:135
void TestMatXLowerTriangularSolve(void)
Definition: Simd.cpp:2146
idVecX TransposeMultiply(const idVecX &vec) const
Definition: Matrix.h:2683
#define NUMTESTS
Definition: Simd.cpp:136
void SetSize(int size)
Definition: Vector.h:1707
void TestGetTextureSpaceLightVectors(void)
Definition: Simd.cpp:3123
void TestMatXTransposeMultiplyMatX(void)
Definition: Simd.cpp:2069
void TestDiv(void)
Definition: Simd.cpp:577
virtual void VPCALL ConvertJointQuatsToJointMats(idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints)=0
static float Log16(float f)
Definition: Math.h:691
static const float INFINITY
Definition: Math.h:218
void TestBlendJoints(void)
Definition: Simd.cpp:2290
#define min(a, b)
bool Compare(const idVec3 &a) const
Definition: Vector.h:496
float Normalize(void)
Definition: Vector.h:646
virtual void VPCALL MatX_TransposeMultiplyAddVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)=0
static double Sqrt64(float x)
Definition: Math.h:306
void TestAdd(void)
Definition: Simd.cpp:354
void TestConvertJointQuatsToJointMats(void)
Definition: Simd.cpp:2362
const GLbyte * weights
Definition: glext.h:3273
void TestMemcpy(void)
Definition: Simd.cpp:1485
static float Log(float f)
Definition: Math.h:687
void TestSoundUpSampling(void)
Definition: Simd.cpp:3355
void TestMulSub(void)
Definition: Simd.cpp:711
void TestMulAdd(void)
Definition: Simd.cpp:655
Definition: Quat.h:306
void TestMath(void)
Definition: Simd.cpp:3642
void TestSub(void)
Definition: Simd.cpp:428
virtual void VPCALL Sub(float *dst, const float constant, const float *src, const int count)=0
bool Compare(const idMat3 &a) const
Definition: Matrix.h:561
virtual void VPCALL UpSamplePCMTo44kHz(float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels)=0
static float Exp16(float f)
Definition: Math.h:652
void TestNegate(void)
Definition: Simd.cpp:4046
static float Tan16(float a)
Definition: Math.h:463
static float ACos16(float a)
Definition: Math.h:554
const GLdouble * v
Definition: glext.h:2936
virtual void VPCALL DeriveTangents(idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)=0
idSIMDProcessor * p_simd
Definition: Simd.cpp:140
idMat3 ToMat3(void) const
Definition: Quat.cpp:70
void Zero(void)
Definition: Vector.h:1767
void TestDeriveTangents(void)
Definition: Simd.cpp:2900
const int MIXBUFFER_SAMPLES
Definition: Simd.h:84
static void Test_f(const class idCmdArgs &args)
Definition: Simd.cpp:4102
#define SOUND_UPSAMPLE_EPSILON
Definition: Simd.cpp:3353
virtual void VPCALL MatX_TransposeMultiplyMatX(idMatX &dst, const idMatX &m1, const idMatX &m2)=0
int Length(void) const
Definition: Str.h:702
void TestCompare(void)
Definition: Simd.cpp:1014
#define StartRecordTime(start)
Definition: Simd.cpp:296
virtual const char *VPCALL GetName(void) const =0
static const float PI
Definition: Math.h:205
virtual void VPCALL CreateTextureSpaceLightVectors(idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)=0
#define SOUND_MIX_EPSILON
Definition: Simd.cpp:3444
virtual void VPCALL Div(float *dst, const float constant, const float *src, const int count)=0
case const int
Definition: Callbacks.cpp:52
virtual void VPCALL Clamp(float *dst, const float *src, const float min, const float max, const int count)=0
virtual void VPCALL CmpGE(byte *dst, const float *src0, const float constant, const int count)=0
void TestMatXLowerTriangularSolveTranspose(void)
Definition: Simd.cpp:2193
const char * Args(int start=1, int end=-1, bool escapeArgs=false) const
Definition: CmdArgs.cpp:52
virtual void VPCALL MixSoundTwoSpeakerMono(float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2])=0
virtual void VPCALL MatX_MultiplyMatX(idMatX &dst, const idMatX &m1, const idMatX &m2)=0
Definition: Vector.h:316
static float Sqrt(float x)
Definition: Math.h:302
void TestUntransformJoints(void)
Definition: Simd.cpp:2535
virtual void VPCALL OverlayPointCull(byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts)=0
static class idSys * sys
Definition: Lib.h:52
virtual void VPCALL Memset(void *dst, const int val, const int count)=0
GLuint src
Definition: glext.h:5390
void SetNormal(const idVec3 &normal)
Definition: Plane.h:233
static float Sqrt16(float x)
Definition: Math.h:298
#define MATX_LTS_SOLVE_SIZE
Definition: Simd.cpp:2139
#define MATX_MATX_SIMD_EPSILON
Definition: Simd.cpp:1931
idQuat & Slerp(const idQuat &from, const idQuat &to, float t)
Definition: Quat.cpp:160
GLenum GLint x
Definition: glext.h:2849
virtual void VPCALL MatX_LowerTriangularSolveTranspose(const idMatX &L, float *x, const float *b, const int n)=0
int i
Definition: process.py:33
Boolean result
int test(char *url)
Definition: lib500.c:3
static void SinCos(float a, float &s, float &c)
Definition: Math.h:390
virtual void VPCALL MatX_TransposeMultiplyVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)=0
int Icmp(const char *text) const
Definition: Str.h:667
static float ASin(float a)
Definition: Math.h:509
static float Cos16(float a)
Definition: Math.h:350
virtual void VPCALL ConvertJointMatsToJointQuats(idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints)=0
static float Sin16(float a)
Definition: Math.h:314
static void Shutdown(void)
Definition: Simd.cpp:118
void TestCreateShadowCache(void)
Definition: Simd.cpp:3254
virtual void VPCALL CmpLE(byte *dst, const float *src0, const float constant, const int count)=0
virtual void FPU_SetDAZ(bool enable)=0
GLfloat GLfloat GLfloat v2
Definition: glext.h:3608
#define StopRecordTime(end)
Definition: Simd.cpp:299
int RandomInt(void)
Definition: Random.h:70
static float Pow16(float x, float y)
Definition: Math.h:640
GLuint dst
Definition: glext.h:5285
virtual void VPCALL CreateSpecularTextureCoords(idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)=0
#define MATX_LTS_SIMD_EPSILON
Definition: Simd.cpp:2138
void Random(int seed, float l=0.0f, float u=1.0f)
Definition: Vector.h:1784
virtual void VPCALL MinMax(float &min, float &max, const float *src, const int count)=0
Definition: Vector.h:52
static float ASin16(float a)
Definition: Math.h:519
virtual void VPCALL CmpLT(byte *dst, const float *src0, const float constant, const int count)=0
GLuint index
Definition: glext.h:3476
Definition: Vector.h:808
void TestMul(void)
Definition: Simd.cpp:502
float RandomFloat(void)
Definition: Random.h:82
virtual void VPCALL BlendJoints(idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints)=0
virtual void VPCALL TransformVerts(idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights)=0
static float ATan(float a)
Definition: Math.h:579
#define NUMJOINTS
Definition: Simd.cpp:2597
virtual void VPCALL MatX_MultiplyVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)=0
virtual void VPCALL UpSampleOGGTo44kHz(float *dest, const float *const *ogg, const int numSamples, const int kHz, const int numChannels)=0
GLuint GLuint end
Definition: glext.h:2845
static float Sin(float a)
Definition: Math.h:310
void TestMatXMultiplyAddVecX(void)
Definition: Simd.cpp:1654
virtual void VPCALL MulAdd(float *dst, const float constant, const float *src, const int count)=0
static float Fabs(float f)
Definition: Math.h:779
virtual void VPCALL DeriveTriPlanes(idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)=0
virtual cpuid_t GetProcessorId(void)=0
void TestDeriveUnsmoothedTangents(void)
Definition: Simd.cpp:2985
idCommon * common
Definition: Common.cpp:206
virtual void VPCALL Add(float *dst, const float constant, const float *src, const int count)=0
void TestMatXMultiplyVecX(void)
Definition: Simd.cpp:1543
#define TEST_VALUE_RANGE
Definition: Simd.cpp:1930
#define NULL
Definition: Lib.h:88
idQuat ToQuat(void) const
Definition: Angles.cpp:131
cpuid_t
Definition: sys_public.h:142
virtual void VPCALL MixSoundSixSpeakerStereo(float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6])=0
void TestTransformVerts(void)
Definition: Simd.cpp:2599
const float * ToFloatPtr(void) const
Definition: Vector.h:1910
virtual void VPCALL MixSoundSixSpeakerMono(float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6])=0
virtual void VPCALL MatX_LowerTriangularSolve(const idMatX &L, float *x, const float *b, const int n, int skip=0)=0
Definition: Plane.h:71
virtual void VPCALL Dot(float *dst, const idVec3 &constant, const idVec3 *src, const int count)=0
Definition: eax4.h:1413
virtual void VPCALL MixSoundTwoSpeakerStereo(float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2])=0
void TestNormalizeTangents(void)
Definition: Simd.cpp:3062
void Random(int seed, float l=0.0f, float u=1.0f)
Definition: Matrix.h:2333
static void InitProcessor(const char *module, bool forceGeneric)
Definition: Simd.cpp:63
virtual void VPCALL Mul(float *dst, const float constant, const float *src, const int count)=0
#define S_COLOR_RED
Definition: Str.h:96
idCQuat ToCQuat(void) const
Definition: Quat.cpp:122
static float Tan(float a)
Definition: Math.h:459
void TestMatXTransposeMultiplyAddVecX(void)
Definition: Simd.cpp:1848
virtual void Printf(const char *fmt,...) id_attribute((format(printf
static float Exp(float f)
Definition: Math.h:648
static void SinCos16(float a, float &s, float &c)
Definition: Math.h:406
void TestMatXLDLTFactor(void)
Definition: Simd.cpp:2243
static void Init(void)
Definition: Simd.cpp:51
#define MATX_LDLT_FACTOR_SOLVE_SIZE
Definition: Simd.cpp:2236
void TestClamp(void)
Definition: Simd.cpp:1386
virtual bool VPCALL MatX_LDLTFactor(idMatX &mat, idVecX &invDiag, const int n)=0
GLfloat GLfloat v1
Definition: glext.h:3607
const char * ToString(int precision=2) const
Definition: Vector.cpp:221
virtual void VPCALL DeriveUnsmoothedTangents(idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts)=0
virtual void VPCALL Memcpy(void *dst, const void *src, const int count)=0
idVec3 mat[3]
Definition: Matrix.h:408
virtual void VPCALL UntransformJoints(idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint)=0
GLubyte GLubyte b
Definition: glext.h:4662
Definition: Quat.h:48
static float Pow(float x, float y)
Definition: Math.h:636
virtual void VPCALL TransformJoints(idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint)=0
void TestDecalPointCull(void)
Definition: Simd.cpp:2722
#define TIME_TYPE
Definition: Simd.cpp:294
virtual void VPCALL TracePointCull(byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts)=0
Definition: Matrix.h:333
void TestMinMax(void)
Definition: Simd.cpp:1250
void SetSize(int rows, int columns)
Definition: Matrix.h:2247
virtual void VPCALL MatX_MultiplyAddVecX(idVecX &dst, const idMatX &mat, const idVecX &vec)=0
void TestMatXTransposeMultiplyVecX(void)
Definition: Simd.cpp:1766
tuple f
Definition: idal.py:89
virtual void VPCALL MulSub(float *dst, const float constant, const float *src, const int count)=0
void TestTracePointCull(void)
Definition: Simd.cpp:2664
GLuint in
Definition: glext.h:5388
void PrintClocks(const char *string, int dataCount, int clocks, int otherClocks=0)
Definition: Simd.cpp:315
idMat3 ToMat3(void) const
Definition: Angles.cpp:199
unsigned char byte
Definition: Lib.h:75
virtual void FPU_SetFTZ(bool enable)=0
void TestTransformJoints(void)
Definition: Simd.cpp:2473
void TestConvertJointMatsToJointQuats(void)
Definition: Simd.cpp:2414
void GetBaseClocks(void)
Definition: Simd.cpp:337
idQuat ToQuat(void) const
Definition: Quat.h:391
Definition: Str.h:116
void TestDeriveTriPlanes(void)
Definition: Simd.cpp:2841
#define RANDOM_SEED
Definition: Simd.cpp:138
void TestDot(void)
Definition: Simd.cpp:767
virtual void VPCALL NormalizeTangents(idDrawVert *verts, const int numVerts)=0
idSIMDProcessor * p_generic
Definition: Simd.cpp:141
virtual void VPCALL MixedSoundToSamples(short *samples, const float *mixBuffer, const int numSamples)=0
idSIMDProcessor * processor
Definition: Simd.cpp:41
long baseClocks
Definition: Simd.cpp:142
bool Compare(const idVecX &a) const
Definition: Vector.h:1675
virtual void VPCALL CmpGT(byte *dst, const float *src0, const float constant, const int count)=0
void TestMatXMultiplyMatX(void)
Definition: Simd.cpp:1933
void TestOverlayPointCull(void)
Definition: Simd.cpp:2783
const char * Argv(int arg) const
Definition: CmdArgs.h:50
GLint j
Definition: qgl.h:264
void TestGetSpecularTextureCoords(void)
Definition: Simd.cpp:3187
virtual void SetRefreshOnPrint(bool set)=0
#define GetBest(start, end, best)
Definition: Simd.cpp:304
char * va(const char *fmt,...)
Definition: Str.cpp:1568
static float ACos(float a)
Definition: Math.h:544
#define max(x, y)
Definition: os.h:70
void TestSoundMixing(void)
Definition: Simd.cpp:3446
virtual void VPCALL Negate16(float *dst, const int count)=0
void Replace(const char *old, const char *nw)
Definition: Str.cpp:563
GLfloat GLfloat p
Definition: glext.h:4674
virtual void VPCALL ClampMin(float *dst, const float *src, const float min, const int count)=0
#define MATX_SIMD_EPSILON
Definition: Simd.cpp:1536
cpuid_t cpuid
Definition: Simd.h:100
float CRandomFloat(void)
Definition: Random.h:86
int LengthWithoutColors(void) const
Definition: Str.h:855
#define NUMVERTS
Definition: Simd.cpp:2598
GLuint start
Definition: glext.h:2845
virtual void VPCALL ClampMax(float *dst, const float *src, const float max, const int count)=0
static float RSqrt(float x)
Definition: Math.h:241
void TestMemset(void)
Definition: Simd.cpp:1514
bool Compare(const idMatX &a) const
Definition: Matrix.h:2211
virtual void VPCALL DecalPointCull(byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts)=0
static class idCommon * common
Definition: Lib.h:53
virtual int VPCALL CreateShadowCache(idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts)=0
GLdouble GLdouble t
Definition: glext.h:2943
virtual int VPCALL CreateVertexProgramShadowCache(idVec4 *vertexCache, const idDrawVert *verts, const int numVerts)=0
static float Cos(float a)
Definition: Math.h:346
idSIMDProcessor * SIMDProcessor
Definition: Simd.cpp:43