doom3-gpl
Doom 3 GPL source release
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Lcp.cpp
Go to the documentation of this file.
1 /*
2 ===========================================================================
3 
4 Doom 3 GPL Source Code
5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
6 
7 This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
8 
9 Doom 3 Source Code is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 Doom 3 Source Code is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
21 
22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
23 
24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
25 
26 ===========================================================================
27 */
28 
29 #include "../precompiled.h"
30 #pragma hdrstop
31 
32 static idCVar lcp_showFailures( "lcp_showFailures", "0", CVAR_SYSTEM | CVAR_BOOL, "show LCP solver failures" );
33 
34 const float LCP_BOUND_EPSILON = 1e-5f;
35 const float LCP_ACCEL_EPSILON = 1e-5f;
36 const float LCP_DELTA_ACCEL_EPSILON = 1e-9f;
37 const float LCP_DELTA_FORCE_EPSILON = 1e-9f;
38 
39 #define IGNORE_UNSATISFIABLE_VARIABLES
40 
41 //===============================================================
42 // M
43 // idLCP_Square MrE
44 // E
45 //===============================================================
46 
47 class idLCP_Square : public idLCP {
48 public:
49  virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
50 
51 private:
52  idMatX m; // original matrix
53  idVecX b; // right hand side
54  idVecX lo, hi; // low and high bounds
55  idVecX f, a; // force and acceleration
56  idVecX delta_f, delta_a; // delta force and delta acceleration
57  idMatX clamped; // LU factored sub matrix for clamped variables
58  idVecX diagonal; // reciprocal of diagonal of U of the LU factored sub matrix for clamped variables
59  int numUnbounded; // number of unbounded variables
60  int numClamped; // number of clamped variables
61  float ** rowPtrs; // pointers to the rows of m
62  int * boxIndex; // box index
63  int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
64  int * permuted; // index to keep track of the permutation
65  bool padded; // set to true if the rows of the initial matrix are 16 byte padded
66 
67 private:
68  bool FactorClamped( void );
69  void SolveClamped( idVecX &x, const float *b );
70  void Swap( int i, int j );
71  void AddClamped( int r );
72  void RemoveClamped( int r );
73  void CalcForceDelta( int d, float dir );
74  void CalcAccelDelta( int d );
75  void ChangeForce( int d, float step );
76  void ChangeAccel( int d, float step );
77  void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
78 };
79 
80 /*
81 ============
82 idLCP_Square::FactorClamped
83 ============
84 */
86  int i, j, k;
87  float s, d;
88 
89  for ( i = 0; i < numClamped; i++ ) {
90  memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
91  }
92 
93  for ( i = 0; i < numClamped; i++ ) {
94 
95  s = idMath::Fabs( clamped[i][i] );
96 
97  if ( s == 0.0f ) {
98  return false;
99  }
100 
101  diagonal[i] = d = 1.0f / clamped[i][i];
102  for ( j = i + 1; j < numClamped; j++ ) {
103  clamped[j][i] *= d;
104  }
105 
106  for ( j = i + 1; j < numClamped; j++ ) {
107  d = clamped[j][i];
108  for ( k = i + 1; k < numClamped; k++ ) {
109  clamped[j][k] -= d * clamped[i][k];
110  }
111  }
112  }
113 
114  return true;
115 }
116 
117 /*
118 ============
119 idLCP_Square::SolveClamped
120 ============
121 */
122 void idLCP_Square::SolveClamped( idVecX &x, const float *b ) {
123  int i, j;
124  float sum;
125 
126  // solve L
127  for ( i = 0; i < numClamped; i++ ) {
128  sum = b[i];
129  for ( j = 0; j < i; j++ ) {
130  sum -= clamped[i][j] * x[j];
131  }
132  x[i] = sum;
133  }
134 
135  // solve U
136  for ( i = numClamped - 1; i >= 0; i-- ) {
137  sum = x[i];
138  for ( j = i + 1; j < numClamped; j++ ) {
139  sum -= clamped[i][j] * x[j];
140  }
141  x[i] = sum * diagonal[i];
142  }
143 }
144 
145 /*
146 ============
147 idLCP_Square::Swap
148 ============
149 */
150 void idLCP_Square::Swap( int i, int j ) {
151 
152  if ( i == j ) {
153  return;
154  }
155 
156  idSwap( rowPtrs[i], rowPtrs[j] );
157  m.SwapColumns( i, j );
158  b.SwapElements( i, j );
159  lo.SwapElements( i, j );
160  hi.SwapElements( i, j );
161  a.SwapElements( i, j );
162  f.SwapElements( i, j );
163  if ( boxIndex ) {
164  idSwap( boxIndex[i], boxIndex[j] );
165  }
166  idSwap( side[i], side[j] );
167  idSwap( permuted[i], permuted[j] );
168 }
169 
170 /*
171 ============
172 idLCP_Square::AddClamped
173 ============
174 */
176  int i, j;
177  float sum;
178 
179  assert( r >= numClamped );
180 
181  // add a row at the bottom and a column at the right of the factored
182  // matrix for the clamped variables
183 
184  Swap( numClamped, r );
185 
186  // add row to L
187  for ( i = 0; i < numClamped; i++ ) {
188  sum = rowPtrs[numClamped][i];
189  for ( j = 0; j < i; j++ ) {
190  sum -= clamped[numClamped][j] * clamped[j][i];
191  }
192  clamped[numClamped][i] = sum * diagonal[i];
193  }
194 
195  // add column to U
196  for ( i = 0; i <= numClamped; i++ ) {
197  sum = rowPtrs[i][numClamped];
198  for ( j = 0; j < i; j++ ) {
199  sum -= clamped[i][j] * clamped[j][numClamped];
200  }
201  clamped[i][numClamped] = sum;
202  }
203 
205 
206  numClamped++;
207 }
208 
209 /*
210 ============
211 idLCP_Square::RemoveClamped
212 ============
213 */
215  int i, j;
216  float *y0, *y1, *z0, *z1;
217  double diag, beta0, beta1, p0, p1, q0, q1, d;
218 
219  assert( r < numClamped );
220 
221  numClamped--;
222 
223  // no need to swap and update the factored matrix when the last row and column are removed
224  if ( r == numClamped ) {
225  return;
226  }
227 
228  y0 = (float *) _alloca16( numClamped * sizeof( float ) );
229  z0 = (float *) _alloca16( numClamped * sizeof( float ) );
230  y1 = (float *) _alloca16( numClamped * sizeof( float ) );
231  z1 = (float *) _alloca16( numClamped * sizeof( float ) );
232 
233  // the row/column need to be subtracted from the factorization
234  for ( i = 0; i < numClamped; i++ ) {
235  y0[i] = -rowPtrs[i][r];
236  }
237 
238  memset( y1, 0, numClamped * sizeof( float ) );
239  y1[r] = 1.0f;
240 
241  memset( z0, 0, numClamped * sizeof( float ) );
242  z0[r] = 1.0f;
243 
244  for ( i = 0; i < numClamped; i++ ) {
245  z1[i] = -rowPtrs[r][i];
246  }
247 
248  // swap the to be removed row/column with the last row/column
249  Swap( r, numClamped );
250 
251  // the swapped last row/column need to be added to the factorization
252  for ( i = 0; i < numClamped; i++ ) {
253  y0[i] += rowPtrs[i][r];
254  }
255 
256  for ( i = 0; i < numClamped; i++ ) {
257  z1[i] += rowPtrs[r][i];
258  }
259  z1[r] = 0.0f;
260 
261  // update the beginning of the to be updated row and column
262  for ( i = 0; i < r; i++ ) {
263  p0 = y0[i];
264  beta1 = z1[i] * diagonal[i];
265 
266  clamped[i][r] += p0;
267  for ( j = i+1; j < numClamped; j++ ) {
268  z1[j] -= beta1 * clamped[i][j];
269  }
270  for ( j = i+1; j < numClamped; j++ ) {
271  y0[j] -= p0 * clamped[j][i];
272  }
273  clamped[r][i] += beta1;
274  }
275 
276  // update the lower right corner starting at r,r
277  for ( i = r; i < numClamped; i++ ) {
278  diag = clamped[i][i];
279 
280  p0 = y0[i];
281  p1 = z0[i];
282  diag += p0 * p1;
283 
284  if ( diag == 0.0f ) {
285  idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
286  return;
287  }
288 
289  beta0 = p1 / diag;
290 
291  q0 = y1[i];
292  q1 = z1[i];
293  diag += q0 * q1;
294 
295  if ( diag == 0.0f ) {
296  idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
297  return;
298  }
299 
300  d = 1.0f / diag;
301  beta1 = q1 * d;
302 
303  clamped[i][i] = diag;
304  diagonal[i] = d;
305 
306  for ( j = i+1; j < numClamped; j++ ) {
307 
308  d = clamped[i][j];
309 
310  d += p0 * z0[j];
311  z0[j] -= beta0 * d;
312 
313  d += q0 * z1[j];
314  z1[j] -= beta1 * d;
315 
316  clamped[i][j] = d;
317  }
318 
319  for ( j = i+1; j < numClamped; j++ ) {
320 
321  d = clamped[j][i];
322 
323  y0[j] -= p0 * d;
324  d += beta0 * y0[j];
325 
326  y1[j] -= q0 * d;
327  d += beta1 * y1[j];
328 
329  clamped[j][i] = d;
330  }
331  }
332  return;
333 }
334 
335 /*
336 ============
337 idLCP_Square::CalcForceDelta
338 
339  modifies this->delta_f
340 ============
341 */
342 ID_INLINE void idLCP_Square::CalcForceDelta( int d, float dir ) {
343  int i;
344  float *ptr;
345 
346  delta_f[d] = dir;
347 
348  if ( numClamped == 0 ) {
349  return;
350  }
351 
352  // get column d of matrix
353  ptr = (float *) _alloca16( numClamped * sizeof( float ) );
354  for ( i = 0; i < numClamped; i++ ) {
355  ptr[i] = rowPtrs[i][d];
356  }
357 
358  // solve force delta
359  SolveClamped( delta_f, ptr );
360 
361  // flip force delta based on direction
362  if ( dir > 0.0f ) {
363  ptr = delta_f.ToFloatPtr();
364  for ( i = 0; i < numClamped; i++ ) {
365  ptr[i] = - ptr[i];
366  }
367  }
368 }
369 
370 /*
371 ============
372 idLCP_Square::CalcAccelDelta
373 
374  modifies this->delta_a and uses this->delta_f
375 ============
376 */
377 ID_INLINE void idLCP_Square::CalcAccelDelta( int d ) {
378  int j;
379  float dot;
380 
381  // only the not clamped variables, including the current variable, can have a change in acceleration
382  for ( j = numClamped; j <= d; j++ ) {
383  // only the clamped variables and the current variable have a force delta unequal zero
385  delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
386  }
387 }
388 
389 /*
390 ============
391 idLCP_Square::ChangeForce
392 
393  modifies this->f and uses this->delta_f
394 ============
395 */
396 ID_INLINE void idLCP_Square::ChangeForce( int d, float step ) {
397  // only the clamped variables and current variable have a force delta unequal zero
399  f[d] += step * delta_f[d];
400 }
401 
402 /*
403 ============
404 idLCP_Square::ChangeAccel
405 
406  modifies this->a and uses this->delta_a
407 ============
408 */
409 ID_INLINE void idLCP_Square::ChangeAccel( int d, float step ) {
410  // only the not clamped variables, including the current variable, can have an acceleration unequal zero
411  SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
412 }
413 
414 /*
415 ============
416 idLCP_Square::GetMaxStep
417 ============
418 */
419 void idLCP_Square::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
420  int i;
421  float s;
422 
423  // default to a full step for the current variable
425  maxStep = -a[d] / delta_a[d];
426  } else {
427  maxStep = 0.0f;
428  }
429  limit = d;
430  limitSide = 0;
431 
432  // test the current variable
433  if ( dir < 0.0f ) {
434  if ( lo[d] != -idMath::INFINITY ) {
435  s = ( lo[d] - f[d] ) / dir;
436  if ( s < maxStep ) {
437  maxStep = s;
438  limitSide = -1;
439  }
440  }
441  } else {
442  if ( hi[d] != idMath::INFINITY ) {
443  s = ( hi[d] - f[d] ) / dir;
444  if ( s < maxStep ) {
445  maxStep = s;
446  limitSide = 1;
447  }
448  }
449  }
450 
451  // test the clamped bounded variables
452  for ( i = numUnbounded; i < numClamped; i++ ) {
453  if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
454  // if there is a low boundary
455  if ( lo[i] != -idMath::INFINITY ) {
456  s = ( lo[i] - f[i] ) / delta_f[i];
457  if ( s < maxStep ) {
458  maxStep = s;
459  limit = i;
460  limitSide = -1;
461  }
462  }
463  } else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
464  // if there is a high boundary
465  if ( hi[i] != idMath::INFINITY ) {
466  s = ( hi[i] - f[i] ) / delta_f[i];
467  if ( s < maxStep ) {
468  maxStep = s;
469  limit = i;
470  limitSide = 1;
471  }
472  }
473  }
474  }
475 
476  // test the not clamped bounded variables
477  for ( i = numClamped; i < d; i++ ) {
478  if ( side[i] == -1 ) {
479  if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
480  continue;
481  }
482  } else if ( side[i] == 1 ) {
483  if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
484  continue;
485  }
486  } else {
487  continue;
488  }
489  // ignore variables for which the force is not allowed to take any substantial value
490  if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
491  continue;
492  }
493  s = -a[i] / delta_a[i];
494  if ( s < maxStep ) {
495  maxStep = s;
496  limit = i;
497  limitSide = 0;
498  }
499  }
500 }
501 
502 /*
503 ============
504 idLCP_Square::Solve
505 ============
506 */
507 bool idLCP_Square::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
508  int i, j, n, limit, limitSide, boxStartIndex;
509  float dir, maxStep, dot, s;
510  char *failed;
511 
512  // true when the matrix rows are 16 byte padded
513  padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
514 
515  assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
516  assert( o_x.GetSize() == o_m.GetNumRows() );
517  assert( o_b.GetSize() == o_m.GetNumRows() );
518  assert( o_lo.GetSize() == o_m.GetNumRows() );
519  assert( o_hi.GetSize() == o_m.GetNumRows() );
520 
521  // allocate memory for permuted input
522  f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
523  a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
524  b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
525  lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
526  hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
527  if ( o_boxIndex ) {
528  boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
529  memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
530  } else {
531  boxIndex = NULL;
532  }
533 
534  // we override the const on o_m here but on exit the matrix is unchanged
535  m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
536  f.Zero();
537  a.Zero();
538  b = o_b;
539  lo = o_lo;
540  hi = o_hi;
541 
542  // pointers to the rows of m
543  rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
544  for ( i = 0; i < m.GetNumRows(); i++ ) {
545  rowPtrs[i] = m[i];
546  }
547 
548  // tells if a variable is at the low boundary, high boundary or inbetween
549  side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
550 
551  // index to keep track of the permutation
552  permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
553  for ( i = 0; i < m.GetNumRows(); i++ ) {
554  permuted[i] = i;
555  }
556 
557  // permute input so all unbounded variables come first
558  numUnbounded = 0;
559  for ( i = 0; i < m.GetNumRows(); i++ ) {
560  if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
561  if ( numUnbounded != i ) {
562  Swap( numUnbounded, i );
563  }
564  numUnbounded++;
565  }
566  }
567 
568  // permute input so all variables using the boxIndex come last
569  boxStartIndex = m.GetNumRows();
570  if ( boxIndex ) {
571  for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
572  if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
573  boxStartIndex--;
574  if ( boxStartIndex != i ) {
575  Swap( boxStartIndex, i );
576  }
577  }
578  }
579  }
580 
581  // sub matrix for factorization
584 
585  // all unbounded variables are clamped
587 
588  // if there are unbounded variables
589  if ( numUnbounded ) {
590 
591  // factor and solve for unbounded variables
592  if ( !FactorClamped() ) {
593  idLib::common->Printf( "idLCP_Square::Solve: unbounded factorization failed\n" );
594  return false;
595  }
596  SolveClamped( f, b.ToFloatPtr() );
597 
598  // if there are no bounded variables we are done
599  if ( numUnbounded == m.GetNumRows() ) {
600  o_x = f; // the vector is not permuted
601  return true;
602  }
603  }
604 
605 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
606  int numIgnored = 0;
607 #endif
608 
609  // allocate for delta force and delta acceleration
612 
613  // solve for bounded variables
614  failed = NULL;
615  for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
616 
617  // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
618  if ( i == boxStartIndex ) {
619  for ( j = 0; j < boxStartIndex; j++ ) {
620  o_x[permuted[j]] = f[j];
621  }
622  for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
623  s = o_x[boxIndex[j]];
624  if ( lo[j] != -idMath::INFINITY ) {
625  lo[j] = - idMath::Fabs( lo[j] * s );
626  }
627  if ( hi[j] != idMath::INFINITY ) {
628  hi[j] = idMath::Fabs( hi[j] * s );
629  }
630  }
631  }
632 
633  // calculate acceleration for current variable
634  SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
635  a[i] = dot - b[i];
636 
637  // if already at the low boundary
638  if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
639  side[i] = -1;
640  continue;
641  }
642 
643  // if already at the high boundary
644  if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
645  side[i] = 1;
646  continue;
647  }
648 
649  // if inside the clamped region
650  if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
651  side[i] = 0;
652  AddClamped( i );
653  continue;
654  }
655 
656  // drive the current variable into a valid region
657  for ( n = 0; n < maxIterations; n++ ) {
658 
659  // direction to move
660  if ( a[i] <= 0.0f ) {
661  dir = 1.0f;
662  } else {
663  dir = -1.0f;
664  }
665 
666  // calculate force delta
667  CalcForceDelta( i, dir );
668 
669  // calculate acceleration delta: delta_a = m * delta_f;
670  CalcAccelDelta( i );
671 
672  // maximum step we can take
673  GetMaxStep( i, dir, maxStep, limit, limitSide );
674 
675  if ( maxStep <= 0.0f ) {
676 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
677  // ignore the current variable completely
678  lo[i] = hi[i] = 0.0f;
679  f[i] = 0.0f;
680  side[i] = -1;
681  numIgnored++;
682 #else
683  failed = va( "invalid step size %.4f", maxStep );
684 #endif
685  break;
686  }
687 
688  // change force
689  ChangeForce( i, maxStep );
690 
691  // change acceleration
692  ChangeAccel( i, maxStep );
693 
694  // clamp/unclamp the variable that limited this step
695  side[limit] = limitSide;
696  switch( limitSide ) {
697  case 0: {
698  a[limit] = 0.0f;
699  AddClamped( limit );
700  break;
701  }
702  case -1: {
703  f[limit] = lo[limit];
704  if ( limit != i ) {
705  RemoveClamped( limit );
706  }
707  break;
708  }
709  case 1: {
710  f[limit] = hi[limit];
711  if ( limit != i ) {
712  RemoveClamped( limit );
713  }
714  break;
715  }
716  }
717 
718  // if the current variable limited the step we can continue with the next variable
719  if ( limit == i ) {
720  break;
721  }
722  }
723 
724  if ( n >= maxIterations ) {
725  failed = va( "max iterations %d", maxIterations );
726  break;
727  }
728 
729  if ( failed ) {
730  break;
731  }
732  }
733 
734 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
735  if ( numIgnored ) {
736  if ( lcp_showFailures.GetBool() ) {
737  idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
738  }
739  }
740 #endif
741 
742  // if failed clear remaining forces
743  if ( failed ) {
744  if ( lcp_showFailures.GetBool() ) {
745  idLib::common->Printf( "idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
746  }
747  for ( j = i; j < m.GetNumRows(); j++ ) {
748  f[j] = 0.0f;
749  }
750  }
751 
752 #if defined(_DEBUG) && 0
753  if ( !failed ) {
754  // test whether or not the solution satisfies the complementarity conditions
755  for ( i = 0; i < m.GetNumRows(); i++ ) {
756  a[i] = -b[i];
757  for ( j = 0; j < m.GetNumRows(); j++ ) {
758  a[i] += rowPtrs[i][j] * f[j];
759  }
760 
761  if ( f[i] == lo[i] ) {
762  if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
763  int bah1 = 1;
764  }
765  } else if ( f[i] == hi[i] ) {
766  if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
767  int bah2 = 1;
768  }
769  } else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
770  int bah3 = 1;
771  }
772  }
773  }
774 #endif
775 
776  // unpermute result
777  for ( i = 0; i < f.GetSize(); i++ ) {
778  o_x[permuted[i]] = f[i];
779  }
780 
781  // unpermute original matrix
782  for ( i = 0; i < m.GetNumRows(); i++ ) {
783  for ( j = 0; j < m.GetNumRows(); j++ ) {
784  if ( permuted[j] == i ) {
785  break;
786  }
787  }
788  if ( i != j ) {
789  m.SwapColumns( i, j );
790  idSwap( permuted[i], permuted[j] );
791  }
792  }
793 
794  return true;
795 }
796 
797 
798 //===============================================================
799 // M
800 // idLCP_Symmetric MrE
801 // E
802 //===============================================================
803 
804 class idLCP_Symmetric : public idLCP {
805 public:
806  virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
807 
808 private:
809  idMatX m; // original matrix
810  idVecX b; // right hand side
811  idVecX lo, hi; // low and high bounds
812  idVecX f, a; // force and acceleration
813  idVecX delta_f, delta_a; // delta force and delta acceleration
814  idMatX clamped; // LDLt factored sub matrix for clamped variables
815  idVecX diagonal; // reciprocal of diagonal of LDLt factored sub matrix for clamped variables
816  idVecX solveCache1; // intermediate result cached in SolveClamped
818  int numUnbounded; // number of unbounded variables
819  int numClamped; // number of clamped variables
820  int clampedChangeStart; // lowest row/column changed in the clamped matrix during an iteration
821  float ** rowPtrs; // pointers to the rows of m
822  int * boxIndex; // box index
823  int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
824  int * permuted; // index to keep track of the permutation
825  bool padded; // set to true if the rows of the initial matrix are 16 byte padded
826 
827 private:
828  bool FactorClamped( void );
829  void SolveClamped( idVecX &x, const float *b );
830  void Swap( int i, int j );
831  void AddClamped( int r, bool useSolveCache );
832  void RemoveClamped( int r );
833  void CalcForceDelta( int d, float dir );
834  void CalcAccelDelta( int d );
835  void ChangeForce( int d, float step );
836  void ChangeAccel( int d, float step );
837  void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
838 };
839 
840 /*
841 ============
842 idLCP_Symmetric::FactorClamped
843 ============
844 */
846 
847  clampedChangeStart = 0;
848 
849  for ( int i = 0; i < numClamped; i++ ) {
850  memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
851  }
852  return SIMDProcessor->MatX_LDLTFactor( clamped, diagonal, numClamped );
853 }
854 
855 /*
856 ============
857 idLCP_Symmetric::SolveClamped
858 ============
859 */
860 void idLCP_Symmetric::SolveClamped( idVecX &x, const float *b ) {
861 
862  // solve L
864 
865  // solve D
867 
868  // solve Lt
870 
872 }
873 
874 /*
875 ============
876 idLCP_Symmetric::Swap
877 ============
878 */
879 void idLCP_Symmetric::Swap( int i, int j ) {
880 
881  if ( i == j ) {
882  return;
883  }
884 
885  idSwap( rowPtrs[i], rowPtrs[j] );
886  m.SwapColumns( i, j );
887  b.SwapElements( i, j );
888  lo.SwapElements( i, j );
889  hi.SwapElements( i, j );
890  a.SwapElements( i, j );
891  f.SwapElements( i, j );
892  if ( boxIndex ) {
893  idSwap( boxIndex[i], boxIndex[j] );
894  }
895  idSwap( side[i], side[j] );
896  idSwap( permuted[i], permuted[j] );
897 }
898 
899 /*
900 ============
901 idLCP_Symmetric::AddClamped
902 ============
903 */
904 void idLCP_Symmetric::AddClamped( int r, bool useSolveCache ) {
905  float d, dot;
906 
907  assert( r >= numClamped );
908 
909  if ( numClamped < clampedChangeStart ) {
911  }
912 
913  // add a row at the bottom and a column at the right of the factored
914  // matrix for the clamped variables
915 
916  Swap( numClamped, r );
917 
918  // solve for v in L * v = rowPtr[numClamped]
919  if ( useSolveCache ) {
920 
921  // the lower triangular solve was cached in SolveClamped called by CalcForceDelta
922  memcpy( clamped[numClamped], solveCache2.ToFloatPtr(), numClamped * sizeof( float ) );
923  // calculate row dot product
925 
926  } else {
927 
928  float *v = (float *) _alloca16( numClamped * sizeof( float ) );
929 
931  // add bottom row to L
932  SIMDProcessor->Mul( clamped[numClamped], v, diagonal.ToFloatPtr(), numClamped );
933  // calculate row dot product
934  SIMDProcessor->Dot( dot, clamped[numClamped], v, numClamped );
935  }
936 
937  // update diagonal[numClamped]
939 
940  if ( d == 0.0f ) {
941  idLib::common->Printf( "idLCP_Symmetric::AddClamped: updating factorization failed\n" );
942  numClamped++;
943  return;
944  }
945 
947  diagonal[numClamped] = 1.0f / d;
948 
949  numClamped++;
950 }
951 
952 /*
953 ============
954 idLCP_Symmetric::RemoveClamped
955 ============
956 */
958  int i, j, n;
959  float *addSub, *original, *v, *ptr, *v1, *v2, dot;
960  double sum, diag, newDiag, invNewDiag, p1, p2, alpha1, alpha2, beta1, beta2;
961 
962  assert( r < numClamped );
963 
964  if ( r < clampedChangeStart ) {
966  }
967 
968  numClamped--;
969 
970  // no need to swap and update the factored matrix when the last row and column are removed
971  if ( r == numClamped ) {
972  return;
973  }
974 
975  // swap the to be removed row/column with the last row/column
976  Swap( r, numClamped );
977 
978  // update the factored matrix
979  addSub = (float *) _alloca16( numClamped * sizeof( float ) );
980 
981  if ( r == 0 ) {
982 
983  if ( numClamped == 1 ) {
984  diag = rowPtrs[0][0];
985  if ( diag == 0.0f ) {
986  idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
987  return;
988  }
989  clamped[0][0] = diag;
990  diagonal[0] = 1.0f / diag;
991  return;
992  }
993 
994  // calculate the row/column to be added to the lower right sub matrix starting at (r, r)
995  original = rowPtrs[numClamped];
996  ptr = rowPtrs[r];
997  addSub[0] = ptr[0] - original[numClamped];
998  for ( i = 1; i < numClamped; i++ ) {
999  addSub[i] = ptr[i] - original[i];
1000  }
1001 
1002  } else {
1003 
1004  v = (float *) _alloca16( numClamped * sizeof( float ) );
1005 
1006  // solve for v in L * v = rowPtr[r]
1008 
1009  // update removed row
1010  SIMDProcessor->Mul( clamped[r], v, diagonal.ToFloatPtr(), r );
1011 
1012  // if the last row/column of the matrix is updated
1013  if ( r == numClamped - 1 ) {
1014  // only calculate new diagonal
1015  SIMDProcessor->Dot( dot, clamped[r], v, r );
1016  diag = rowPtrs[r][r] - dot;
1017  if ( diag == 0.0f ) {
1018  idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
1019  return;
1020  }
1021  clamped[r][r] = diag;
1022  diagonal[r] = 1.0f / diag;
1023  return;
1024  }
1025 
1026  // calculate the row/column to be added to the lower right sub matrix starting at (r, r)
1027  for ( i = 0; i < r; i++ ) {
1028  v[i] = clamped[r][i] * clamped[i][i];
1029  }
1030  for ( i = r; i < numClamped; i++ ) {
1031  if ( i == r ) {
1032  sum = clamped[r][r];
1033  } else {
1034  sum = clamped[r][r] * clamped[i][r];
1035  }
1036  ptr = clamped[i];
1037  for ( j = 0; j < r; j++ ) {
1038  sum += ptr[j] * v[j];
1039  }
1040  addSub[i] = rowPtrs[r][i] - sum;
1041  }
1042  }
1043 
1044  // add row/column to the lower right sub matrix starting at (r, r)
1045 
1046  v1 = (float *) _alloca16( numClamped * sizeof( float ) );
1047  v2 = (float *) _alloca16( numClamped * sizeof( float ) );
1048 
1049  diag = idMath::SQRT_1OVER2;
1050  v1[r] = ( 0.5f * addSub[r] + 1.0f ) * diag;
1051  v2[r] = ( 0.5f * addSub[r] - 1.0f ) * diag;
1052  for ( i = r+1; i < numClamped; i++ ) {
1053  v1[i] = v2[i] = addSub[i] * diag;
1054  }
1055 
1056  alpha1 = 1.0f;
1057  alpha2 = -1.0f;
1058 
1059  // simultaneous update/downdate of the sub matrix starting at (r, r)
1060  n = clamped.GetNumColumns();
1061  for ( i = r; i < numClamped; i++ ) {
1062 
1063  diag = clamped[i][i];
1064  p1 = v1[i];
1065  newDiag = diag + alpha1 * p1 * p1;
1066 
1067  if ( newDiag == 0.0f ) {
1068  idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
1069  return;
1070  }
1071 
1072  alpha1 /= newDiag;
1073  beta1 = p1 * alpha1;
1074  alpha1 *= diag;
1075 
1076  diag = newDiag;
1077  p2 = v2[i];
1078  newDiag = diag + alpha2 * p2 * p2;
1079 
1080  if ( newDiag == 0.0f ) {
1081  idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
1082  return;
1083  }
1084 
1085  clamped[i][i] = newDiag;
1086  diagonal[i] = invNewDiag = 1.0f / newDiag;
1087 
1088  alpha2 *= invNewDiag;
1089  beta2 = p2 * alpha2;
1090  alpha2 *= diag;
1091 
1092  // update column below diagonal (i,i)
1093  ptr = clamped.ToFloatPtr() + i;
1094 
1095  for ( j = i+1; j < numClamped - 1; j += 2 ) {
1096 
1097  float sum0 = ptr[(j+0)*n];
1098  float sum1 = ptr[(j+1)*n];
1099 
1100  v1[j+0] -= p1 * sum0;
1101  v1[j+1] -= p1 * sum1;
1102 
1103  sum0 += beta1 * v1[j+0];
1104  sum1 += beta1 * v1[j+1];
1105 
1106  v2[j+0] -= p2 * sum0;
1107  v2[j+1] -= p2 * sum1;
1108 
1109  sum0 += beta2 * v2[j+0];
1110  sum1 += beta2 * v2[j+1];
1111 
1112  ptr[(j+0)*n] = sum0;
1113  ptr[(j+1)*n] = sum1;
1114  }
1115 
1116  for ( ; j < numClamped; j++ ) {
1117 
1118  sum = ptr[j*n];
1119 
1120  v1[j] -= p1 * sum;
1121  sum += beta1 * v1[j];
1122 
1123  v2[j] -= p2 * sum;
1124  sum += beta2 * v2[j];
1125 
1126  ptr[j*n] = sum;
1127  }
1128  }
1129 }
1130 
1131 /*
1132 ============
1133 idLCP_Symmetric::CalcForceDelta
1134 
1135  modifies this->delta_f
1136 ============
1137 */
1138 ID_INLINE void idLCP_Symmetric::CalcForceDelta( int d, float dir ) {
1139  int i;
1140  float *ptr;
1141 
1142  delta_f[d] = dir;
1143 
1144  if ( numClamped == 0 ) {
1145  return;
1146  }
1147 
1148  // solve force delta
1149  SolveClamped( delta_f, rowPtrs[d] );
1150 
1151  // flip force delta based on direction
1152  if ( dir > 0.0f ) {
1153  ptr = delta_f.ToFloatPtr();
1154  for ( i = 0; i < numClamped; i++ ) {
1155  ptr[i] = - ptr[i];
1156  }
1157  }
1158 }
1159 
1160 /*
1161 ============
1162 idLCP_Symmetric::CalcAccelDelta
1163 
1164  modifies this->delta_a and uses this->delta_f
1165 ============
1166 */
1167 ID_INLINE void idLCP_Symmetric::CalcAccelDelta( int d ) {
1168  int j;
1169  float dot;
1170 
1171  // only the not clamped variables, including the current variable, can have a change in acceleration
1172  for ( j = numClamped; j <= d; j++ ) {
1173  // only the clamped variables and the current variable have a force delta unequal zero
1175  delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
1176  }
1177 }
1178 
1179 /*
1180 ============
1181 idLCP_Symmetric::ChangeForce
1182 
1183  modifies this->f and uses this->delta_f
1184 ============
1185 */
1186 ID_INLINE void idLCP_Symmetric::ChangeForce( int d, float step ) {
1187  // only the clamped variables and current variable have a force delta unequal zero
1189  f[d] += step * delta_f[d];
1190 }
1191 
1192 /*
1193 ============
1194 idLCP_Symmetric::ChangeAccel
1195 
1196  modifies this->a and uses this->delta_a
1197 ============
1198 */
1199 ID_INLINE void idLCP_Symmetric::ChangeAccel( int d, float step ) {
1200  // only the not clamped variables, including the current variable, can have an acceleration unequal zero
1201  SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
1202 }
1203 
1204 /*
1205 ============
1206 idLCP_Symmetric::GetMaxStep
1207 ============
1208 */
1209 void idLCP_Symmetric::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
1210  int i;
1211  float s;
1212 
1213  // default to a full step for the current variable
1215  maxStep = -a[d] / delta_a[d];
1216  } else {
1217  maxStep = 0.0f;
1218  }
1219  limit = d;
1220  limitSide = 0;
1221 
1222  // test the current variable
1223  if ( dir < 0.0f ) {
1224  if ( lo[d] != -idMath::INFINITY ) {
1225  s = ( lo[d] - f[d] ) / dir;
1226  if ( s < maxStep ) {
1227  maxStep = s;
1228  limitSide = -1;
1229  }
1230  }
1231  } else {
1232  if ( hi[d] != idMath::INFINITY ) {
1233  s = ( hi[d] - f[d] ) / dir;
1234  if ( s < maxStep ) {
1235  maxStep = s;
1236  limitSide = 1;
1237  }
1238  }
1239  }
1240 
1241  // test the clamped bounded variables
1242  for ( i = numUnbounded; i < numClamped; i++ ) {
1243  if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
1244  // if there is a low boundary
1245  if ( lo[i] != -idMath::INFINITY ) {
1246  s = ( lo[i] - f[i] ) / delta_f[i];
1247  if ( s < maxStep ) {
1248  maxStep = s;
1249  limit = i;
1250  limitSide = -1;
1251  }
1252  }
1253  } else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
1254  // if there is a high boundary
1255  if ( hi[i] != idMath::INFINITY ) {
1256  s = ( hi[i] - f[i] ) / delta_f[i];
1257  if ( s < maxStep ) {
1258  maxStep = s;
1259  limit = i;
1260  limitSide = 1;
1261  }
1262  }
1263  }
1264  }
1265 
1266  // test the not clamped bounded variables
1267  for ( i = numClamped; i < d; i++ ) {
1268  if ( side[i] == -1 ) {
1269  if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
1270  continue;
1271  }
1272  } else if ( side[i] == 1 ) {
1273  if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
1274  continue;
1275  }
1276  } else {
1277  continue;
1278  }
1279  // ignore variables for which the force is not allowed to take any substantial value
1280  if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
1281  continue;
1282  }
1283  s = -a[i] / delta_a[i];
1284  if ( s < maxStep ) {
1285  maxStep = s;
1286  limit = i;
1287  limitSide = 0;
1288  }
1289  }
1290 }
1291 
1292 /*
1293 ============
1294 idLCP_Symmetric::Solve
1295 ============
1296 */
1297 bool idLCP_Symmetric::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
1298  int i, j, n, limit, limitSide, boxStartIndex;
1299  float dir, maxStep, dot, s;
1300  char *failed;
1301 
1302  // true when the matrix rows are 16 byte padded
1303  padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
1304 
1305  assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
1306  assert( o_x.GetSize() == o_m.GetNumRows() );
1307  assert( o_b.GetSize() == o_m.GetNumRows() );
1308  assert( o_lo.GetSize() == o_m.GetNumRows() );
1309  assert( o_hi.GetSize() == o_m.GetNumRows() );
1310 
1311  // allocate memory for permuted input
1312  f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
1313  a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
1314  b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
1315  lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
1316  hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
1317  if ( o_boxIndex ) {
1318  boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
1319  memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
1320  } else {
1321  boxIndex = NULL;
1322  }
1323 
1324  // we override the const on o_m here but on exit the matrix is unchanged
1325  m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
1326  f.Zero();
1327  a.Zero();
1328  b = o_b;
1329  lo = o_lo;
1330  hi = o_hi;
1331 
1332  // pointers to the rows of m
1333  rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
1334  for ( i = 0; i < m.GetNumRows(); i++ ) {
1335  rowPtrs[i] = m[i];
1336  }
1337 
1338  // tells if a variable is at the low boundary, high boundary or inbetween
1339  side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
1340 
1341  // index to keep track of the permutation
1342  permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
1343  for ( i = 0; i < m.GetNumRows(); i++ ) {
1344  permuted[i] = i;
1345  }
1346 
1347  // permute input so all unbounded variables come first
1348  numUnbounded = 0;
1349  for ( i = 0; i < m.GetNumRows(); i++ ) {
1350  if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
1351  if ( numUnbounded != i ) {
1352  Swap( numUnbounded, i );
1353  }
1354  numUnbounded++;
1355  }
1356  }
1357 
1358  // permute input so all variables using the boxIndex come last
1359  boxStartIndex = m.GetNumRows();
1360  if ( boxIndex ) {
1361  for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
1362  if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
1363  boxStartIndex--;
1364  if ( boxStartIndex != i ) {
1365  Swap( boxStartIndex, i );
1366  }
1367  }
1368  }
1369  }
1370 
1371  // sub matrix for factorization
1376 
1377  // all unbounded variables are clamped
1379 
1380  // if there are unbounded variables
1381  if ( numUnbounded ) {
1382 
1383  // factor and solve for unbounded variables
1384  if ( !FactorClamped() ) {
1385  idLib::common->Printf( "idLCP_Symmetric::Solve: unbounded factorization failed\n" );
1386  return false;
1387  }
1388  SolveClamped( f, b.ToFloatPtr() );
1389 
1390  // if there are no bounded variables we are done
1391  if ( numUnbounded == m.GetNumRows() ) {
1392  o_x = f; // the vector is not permuted
1393  return true;
1394  }
1395  }
1396 
1397 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
1398  int numIgnored = 0;
1399 #endif
1400 
1401  // allocate for delta force and delta acceleration
1404 
1405  // solve for bounded variables
1406  failed = NULL;
1407  for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
1408 
1409  clampedChangeStart = 0;
1410 
1411  // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
1412  if ( i == boxStartIndex ) {
1413  for ( j = 0; j < boxStartIndex; j++ ) {
1414  o_x[permuted[j]] = f[j];
1415  }
1416  for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
1417  s = o_x[boxIndex[j]];
1418  if ( lo[j] != -idMath::INFINITY ) {
1419  lo[j] = - idMath::Fabs( lo[j] * s );
1420  }
1421  if ( hi[j] != idMath::INFINITY ) {
1422  hi[j] = idMath::Fabs( hi[j] * s );
1423  }
1424  }
1425  }
1426 
1427  // calculate acceleration for current variable
1428  SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
1429  a[i] = dot - b[i];
1430 
1431  // if already at the low boundary
1432  if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
1433  side[i] = -1;
1434  continue;
1435  }
1436 
1437  // if already at the high boundary
1438  if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
1439  side[i] = 1;
1440  continue;
1441  }
1442 
1443  // if inside the clamped region
1444  if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
1445  side[i] = 0;
1446  AddClamped( i, false );
1447  continue;
1448  }
1449 
1450  // drive the current variable into a valid region
1451  for ( n = 0; n < maxIterations; n++ ) {
1452 
1453  // direction to move
1454  if ( a[i] <= 0.0f ) {
1455  dir = 1.0f;
1456  } else {
1457  dir = -1.0f;
1458  }
1459 
1460  // calculate force delta
1461  CalcForceDelta( i, dir );
1462 
1463  // calculate acceleration delta: delta_a = m * delta_f;
1464  CalcAccelDelta( i );
1465 
1466  // maximum step we can take
1467  GetMaxStep( i, dir, maxStep, limit, limitSide );
1468 
1469  if ( maxStep <= 0.0f ) {
1470 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
1471  // ignore the current variable completely
1472  lo[i] = hi[i] = 0.0f;
1473  f[i] = 0.0f;
1474  side[i] = -1;
1475  numIgnored++;
1476 #else
1477  failed = va( "invalid step size %.4f", maxStep );
1478 #endif
1479  break;
1480  }
1481 
1482  // change force
1483  ChangeForce( i, maxStep );
1484 
1485  // change acceleration
1486  ChangeAccel( i, maxStep );
1487 
1488  // clamp/unclamp the variable that limited this step
1489  side[limit] = limitSide;
1490  switch( limitSide ) {
1491  case 0: {
1492  a[limit] = 0.0f;
1493  AddClamped( limit, ( limit == i ) );
1494  break;
1495  }
1496  case -1: {
1497  f[limit] = lo[limit];
1498  if ( limit != i ) {
1499  RemoveClamped( limit );
1500  }
1501  break;
1502  }
1503  case 1: {
1504  f[limit] = hi[limit];
1505  if ( limit != i ) {
1506  RemoveClamped( limit );
1507  }
1508  break;
1509  }
1510  }
1511 
1512  // if the current variable limited the step we can continue with the next variable
1513  if ( limit == i ) {
1514  break;
1515  }
1516  }
1517 
1518  if ( n >= maxIterations ) {
1519  failed = va( "max iterations %d", maxIterations );
1520  break;
1521  }
1522 
1523  if ( failed ) {
1524  break;
1525  }
1526  }
1527 
1528 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
1529  if ( numIgnored ) {
1530  if ( lcp_showFailures.GetBool() ) {
1531  idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
1532  }
1533  }
1534 #endif
1535 
1536  // if failed clear remaining forces
1537  if ( failed ) {
1538  if ( lcp_showFailures.GetBool() ) {
1539  idLib::common->Printf( "idLCP_Symmetric::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
1540  }
1541  for ( j = i; j < m.GetNumRows(); j++ ) {
1542  f[j] = 0.0f;
1543  }
1544  }
1545 
1546 #if defined(_DEBUG) && 0
1547  if ( !failed ) {
1548  // test whether or not the solution satisfies the complementarity conditions
1549  for ( i = 0; i < m.GetNumRows(); i++ ) {
1550  a[i] = -b[i];
1551  for ( j = 0; j < m.GetNumRows(); j++ ) {
1552  a[i] += rowPtrs[i][j] * f[j];
1553  }
1554 
1555  if ( f[i] == lo[i] ) {
1556  if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
1557  int bah1 = 1;
1558  }
1559  } else if ( f[i] == hi[i] ) {
1560  if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
1561  int bah2 = 1;
1562  }
1563  } else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
1564  int bah3 = 1;
1565  }
1566  }
1567  }
1568 #endif
1569 
1570  // unpermute result
1571  for ( i = 0; i < f.GetSize(); i++ ) {
1572  o_x[permuted[i]] = f[i];
1573  }
1574 
1575  // unpermute original matrix
1576  for ( i = 0; i < m.GetNumRows(); i++ ) {
1577  for ( j = 0; j < m.GetNumRows(); j++ ) {
1578  if ( permuted[j] == i ) {
1579  break;
1580  }
1581  }
1582  if ( i != j ) {
1583  m.SwapColumns( i, j );
1584  idSwap( permuted[i], permuted[j] );
1585  }
1586  }
1587 
1588  return true;
1589 }
1590 
1591 
1592 //===============================================================
1593 //
1594 // idLCP
1595 //
1596 //===============================================================
1597 
1598 /*
1599 ============
1600 idLCP::AllocSquare
1601 ============
1602 */
1604  idLCP *lcp = new idLCP_Square;
1605  lcp->SetMaxIterations( 32 );
1606  return lcp;
1607 }
1608 
1609 /*
1610 ============
1611 idLCP::AllocSymmetric
1612 ============
1613 */
1615  idLCP *lcp = new idLCP_Symmetric;
1616  lcp->SetMaxIterations( 32 );
1617  return lcp;
1618 }
1619 
1620 /*
1621 ============
1622 idLCP::~idLCP
1623 ============
1624 */
1625 idLCP::~idLCP( void ) {
1626 }
1627 
1628 /*
1629 ============
1630 idLCP::SetMaxIterations
1631 ============
1632 */
1634  maxIterations = max;
1635 }
1636 
1637 /*
1638 ============
1639 idLCP::GetMaxIterations
1640 ============
1641 */
1643  return maxIterations;
1644 }
void ChangeAccel(int d, float step)
Definition: Lcp.cpp:1199
static const float INFINITY
Definition: Math.h:218
int clampedChangeStart
Definition: Lcp.cpp:820
assert(prefInfo.fullscreenBtn)
bool FactorClamped(void)
Definition: Lcp.cpp:85
int GetSize(void) const
Definition: Vector.h:1467
void Swap(int i, int j)
Definition: Lcp.cpp:150
void SolveClamped(idVecX &x, const float *b)
Definition: Lcp.cpp:122
idVecX delta_a
Definition: Lcp.cpp:813
void RemoveClamped(int r)
Definition: Lcp.cpp:214
const GLdouble * v
Definition: glext.h:2936
Definition: Lcp.h:62
void Zero(void)
Definition: Vector.h:1767
const float LCP_ACCEL_EPSILON
Definition: Lcp.cpp:35
GLenum GLsizei n
Definition: glext.h:3705
void Swap(int i, int j)
Definition: Lcp.cpp:879
idVecX f
Definition: Lcp.cpp:812
case const int
Definition: Callbacks.cpp:52
#define VECX_ALLOCA(n)
Definition: Vector.h:1432
float ** rowPtrs
Definition: Lcp.cpp:61
const float LCP_DELTA_FORCE_EPSILON
Definition: Lcp.cpp:37
int numClamped
Definition: Lcp.cpp:819
idVecX & SwapElements(int e1, int e2)
Definition: Vector.h:1829
case const float
Definition: Callbacks.cpp:62
idMatX clamped
Definition: Lcp.cpp:57
virtual ~idLCP(void)
Definition: Lcp.cpp:1625
void AddClamped(int r)
Definition: Lcp.cpp:175
const float LCP_BOUND_EPSILON
Definition: Lcp.cpp:34
void ChangeForce(int d, float step)
Definition: Lcp.cpp:396
GLdouble s
Definition: glext.h:2935
virtual bool Solve(const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex)
Definition: Lcp.cpp:507
GLenum GLint x
Definition: glext.h:2849
virtual void VPCALL MatX_LowerTriangularSolveTranspose(const idMatX &L, float *x, const float *b, const int n)=0
int i
Definition: process.py:33
bool FactorClamped(void)
Definition: Lcp.cpp:845
static idLCP * AllocSymmetric(void)
Definition: Lcp.cpp:1614
static idLCP * AllocSquare(void)
Definition: Lcp.cpp:1603
idVecX lo
Definition: Lcp.cpp:811
virtual bool Solve(const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex)
Definition: Lcp.cpp:1297
GLfloat GLfloat GLfloat v2
Definition: glext.h:3608
int GetNumColumns(void) const
Definition: Matrix.h:1822
int * side
Definition: Lcp.cpp:823
idMatX m
Definition: Lcp.cpp:52
idVecX f
Definition: Lcp.cpp:55
void GetMaxStep(int d, float dir, float &maxStep, int &limit, int &limitSide) const
Definition: Lcp.cpp:1209
idMatX & SwapColumns(int r1, int r2)
Definition: Matrix.h:2393
idVecX delta_f
Definition: Lcp.cpp:56
int numUnbounded
Definition: Lcp.cpp:818
virtual void VPCALL MulAdd(float *dst, const float constant, const float *src, const int count)=0
static float Fabs(float f)
Definition: Math.h:779
#define NULL
Definition: Lib.h:88
static const float SQRT_1OVER2
Definition: Math.h:212
#define MATX_ALLOCA(n)
Definition: Matrix.h:1783
const float * ToFloatPtr(void) const
Definition: Vector.h:1910
void GetMaxStep(int d, float dir, float &maxStep, int &limit, int &limitSide) const
Definition: Lcp.cpp:419
void AddClamped(int r, bool useSolveCache)
Definition: Lcp.cpp:904
virtual void VPCALL MatX_LowerTriangularSolve(const idMatX &L, float *x, const float *b, const int n, int skip=0)=0
virtual void VPCALL Dot(float *dst, const idVec3 &constant, const idVec3 *src, const int count)=0
float ** rowPtrs
Definition: Lcp.cpp:821
int GetNumRows(void) const
Definition: Matrix.h:1821
int * side
Definition: Lcp.cpp:63
int * boxIndex
Definition: Lcp.cpp:62
virtual int GetMaxIterations(void)
Definition: Lcp.cpp:1642
virtual void VPCALL Mul(float *dst, const float constant, const float *src, const int count)=0
GLubyte GLubyte GLubyte a
Definition: glext.h:4662
virtual void Printf(const char *fmt,...) id_attribute((format(printf
void CalcForceDelta(int d, float dir)
Definition: Lcp.cpp:1138
int * boxIndex
Definition: Lcp.cpp:822
ID_INLINE void idSwap(type &a, type &b)
Definition: List.h:77
idVecX diagonal
Definition: Lcp.cpp:815
int * permuted
Definition: Lcp.cpp:64
virtual bool VPCALL MatX_LDLTFactor(idMatX &mat, idVecX &invDiag, const int n)=0
GLfloat GLfloat v1
Definition: glext.h:3607
void CalcForceDelta(int d, float dir)
Definition: Lcp.cpp:342
idVecX a
Definition: Lcp.cpp:812
idVecX b
Definition: Lcp.cpp:810
GLubyte GLubyte b
Definition: glext.h:4662
idVecX solveCache2
Definition: Lcp.cpp:817
GLdouble GLdouble GLdouble r
Definition: glext.h:2951
void RemoveClamped(int r)
Definition: Lcp.cpp:957
idVecX delta_f
Definition: Lcp.cpp:813
void SolveClamped(idVecX &x, const float *b)
Definition: Lcp.cpp:860
virtual void SetMaxIterations(int max)
Definition: Lcp.cpp:1633
idVecX hi
Definition: Lcp.cpp:811
idVecX hi
Definition: Lcp.cpp:54
bool GetBool(void) const
Definition: CVarSystem.h:142
tuple f
Definition: idal.py:89
const float LCP_DELTA_ACCEL_EPSILON
Definition: Lcp.cpp:36
idVecX diagonal
Definition: Lcp.cpp:58
idVecX delta_a
Definition: Lcp.cpp:56
int numUnbounded
Definition: Lcp.cpp:59
void CalcAccelDelta(int d)
Definition: Lcp.cpp:377
GLdouble y1
Definition: qgl.h:415
void SetData(int rows, int columns, float *data)
Definition: Matrix.h:2278
int maxIterations
Definition: Lcp.h:74
idVecX solveCache1
Definition: Lcp.cpp:816
bool padded
Definition: Lcp.cpp:65
int * permuted
Definition: Lcp.cpp:824
void SetData(int length, float *data)
Definition: Vector.h:1756
idMatX m
Definition: Lcp.cpp:809
idVecX b
Definition: Lcp.cpp:53
void CalcAccelDelta(int d)
Definition: Lcp.cpp:1167
bool padded
Definition: Lcp.cpp:825
int numClamped
Definition: Lcp.cpp:60
GLint j
Definition: qgl.h:264
float dot(float a[], float b[])
Definition: Model_lwo.cpp:3883
char * va(const char *fmt,...)
Definition: Str.cpp:1568
void ChangeForce(int d, float step)
Definition: Lcp.cpp:1186
#define max(x, y)
Definition: os.h:70
idVecX a
Definition: Lcp.cpp:55
idVecX lo
Definition: Lcp.cpp:54
const float * ToFloatPtr(void) const
Definition: Matrix.h:2935
static class idCommon * common
Definition: Lib.h:53
void ChangeAccel(int d, float step)
Definition: Lcp.cpp:409
idMatX clamped
Definition: Lcp.cpp:814
idSIMDProcessor * SIMDProcessor
Definition: Simd.cpp:43