Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: code/branches/ode/ode-0.9/ode/src/lcp.cpp @ 216

Last change on this file since 216 was 216, checked in by mathiask, 16 years ago

[Physik] add ode-0.9

File size: 52.0 KB
Line 
1/*************************************************************************
2 *                                                                       *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
4 * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
5 *                                                                       *
6 * This library is free software; you can redistribute it and/or         *
7 * modify it under the terms of EITHER:                                  *
8 *   (1) The GNU Lesser General Public License as published by the Free  *
9 *       Software Foundation; either version 2.1 of the License, or (at  *
10 *       your option) any later version. The text of the GNU Lesser      *
11 *       General Public License is included with this library in the     *
12 *       file LICENSE.TXT.                                               *
13 *   (2) The BSD-style license that is included with this library in     *
14 *       the file LICENSE-BSD.TXT.                                       *
15 *                                                                       *
16 * This library is distributed in the hope that it will be useful,       *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
20 *                                                                       *
21 *************************************************************************/
22
23/*
24
25
26THE ALGORITHM
27-------------
28
29solve A*x = b+w, with x and w subject to certain LCP conditions.
30each x(i),w(i) must lie on one of the three line segments in the following
31diagram. each line segment corresponds to one index set :
32
33     w(i)
34     /|\      |           :
35      |       |           :
36      |       |i in N     :
37  w>0 |       |state[i]=0 :
38      |       |           :
39      |       |           :  i in C
40  w=0 +       +-----------------------+
41      |                   :           |
42      |                   :           |
43  w<0 |                   :           |i in N
44      |                   :           |state[i]=1
45      |                   :           |
46      |                   :           |
47      +-------|-----------|-----------|----------> x(i)
48             lo           0           hi
49
50the Dantzig algorithm proceeds as follows:
51  for i=1:n
52    * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53      negative towards the line. as this is done, the other (x(j),w(j))
54      for j<i are constrained to be on the line. if any (x,w) reaches the
55      end of a line segment then it is switched between index sets.
56    * i is added to the appropriate index set depending on what line segment
57      it hits.
58
59we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60simpler, because the starting point for x(i),w(i) is always on the dotted
61line x=0 and x will only ever increase in one direction, so it can only hit
62two out of the three line segments.
63
64
65NOTES
66-----
67
68this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69the implementation is split into an LCP problem object (dLCP) and an LCP
70driver function. most optimization occurs in the dLCP object.
71
72a naive implementation of the algorithm requires either a lot of data motion
73or a lot of permutation-array lookup, because we are constantly re-ordering
74rows and columns. to avoid this and make a more optimized algorithm, a
75non-trivial data structure is used to represent the matrix A (this is
76implemented in the fast version of the dLCP object).
77
78during execution of this algorithm, some indexes in A are clamped (set C),
79some are non-clamped (set N), and some are "don't care" (where x=0).
80A,x,b,w (and other problem vectors) are permuted such that the clamped
81indexes are first, the unclamped indexes are next, and the don't-care
82indexes are last. this permutation is recorded in the array `p'.
83initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84the corresponding elements of p are swapped.
85
86because the C and N elements are grouped together in the rows of A, we can do
87lots of work with a fast dot product function. if A,x,etc were not permuted
88and we only had a permutation array, then those dot products would be much
89slower as we would have a permutation array lookup in some inner loops.
90
91A is accessed through an array of row pointers, so that element (i,j) of the
92permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93we still have to actually move the data.
94
95during execution of this algorithm we maintain an L*D*L' factorization of
96the clamped submatrix of A (call it `AC') which is the top left nC*nC
97submatrix of A. there are two ways we could arrange the rows/columns in AC.
98
99(1) AC is always permuted such that L*D*L' = AC. this causes a problem
100    when a row/column is removed from C, because then all the rows/columns of A
101    between the deleted index and the end of C need to be rotated downward.
102    this results in a lot of data motion and slows things down.
103(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104    itself a permutation of the underlying A). this is what we do - the
105    permutation is recorded in the vector C. call this permutation A[C,C].
106    when a row/column is removed from C, all we have to do is swap two
107    rows/columns and manipulate C.
108
109*/
110
111#include <ode/common.h>
112#include "lcp.h"
113#include <ode/matrix.h>
114#include <ode/misc.h>
115#include "mat.h"                // for testing
116#include <ode/timer.h>          // for testing
117
118//***************************************************************************
119// code generation parameters
120
121// LCP debugging (mosty for fast dLCP) - this slows things down a lot
122//#define DEBUG_LCP
123
124//#define dLCP_SLOW             // use slow dLCP object
125#define dLCP_FAST               // use fast dLCP object
126
127// option 1 : matrix row pointers (less data copying)
128#define ROWPTRS
129#define ATYPE dReal **
130#define AROW(i) (A[i])
131
132// option 2 : no matrix row pointers (slightly faster inner loops)
133//#define NOROWPTRS
134//#define ATYPE dReal *
135//#define AROW(i) (A+(i)*nskip)
136
137// use protected, non-stack memory allocation system
138
139#ifdef dUSE_MALLOC_FOR_ALLOCA
140extern unsigned int dMemoryFlag;
141
142#define ALLOCA(t,v,s) t* v = (t*) malloc(s)
143#define UNALLOCA(t)  free(t)
144
145#else
146
147#define ALLOCA(t,v,s) t* v =(t*)dALLOCA16(s)
148#define UNALLOCA(t)  /* nothing */
149
150#endif
151
152#define NUB_OPTIMIZATIONS
153
154//***************************************************************************
155
156// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
157// A is nskip. this only references and swaps the lower triangle.
158// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
159// rows will be swapped by exchanging row pointers. otherwise the data will
160// be copied.
161
162static void swapRowsAndCols (ATYPE A, int n, int i1, int i2, int nskip,
163                             int do_fast_row_swaps)
164{
165  int i;
166  dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
167            nskip >= n && i1 < i2);
168
169# ifdef ROWPTRS
170  for (i=i1+1; i<i2; i++) A[i1][i] = A[i][i1];
171  for (i=i1+1; i<i2; i++) A[i][i1] = A[i2][i];
172  A[i1][i2] = A[i1][i1];
173  A[i1][i1] = A[i2][i1];
174  A[i2][i1] = A[i2][i2];
175  // swap rows, by swapping row pointers
176  if (do_fast_row_swaps) {
177    dReal *tmpp;
178    tmpp = A[i1];
179    A[i1] = A[i2];
180    A[i2] = tmpp;
181  }
182  else {
183    ALLOCA (dReal,tmprow,n * sizeof(dReal));
184
185#ifdef dUSE_MALLOC_FOR_ALLOCA
186    if (tmprow == NULL) {
187      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;     
188      return;
189    }
190#endif
191
192    memcpy (tmprow,A[i1],n * sizeof(dReal));
193    memcpy (A[i1],A[i2],n * sizeof(dReal));
194    memcpy (A[i2],tmprow,n * sizeof(dReal));
195    UNALLOCA(tmprow);
196  }
197  // swap columns the hard way
198  for (i=i2+1; i<n; i++) {
199    dReal tmp = A[i][i1];
200    A[i][i1] = A[i][i2];
201    A[i][i2] = tmp;
202  }
203# else
204  dReal tmp;
205  ALLOCA (dReal,tmprow,n * sizeof(dReal));
206
207#ifdef dUSE_MALLOC_FOR_ALLOCA
208  if (tmprow == NULL) {
209    return;
210  }
211#endif
212
213  if (i1 > 0) {
214    memcpy (tmprow,A+i1*nskip,i1*sizeof(dReal));
215    memcpy (A+i1*nskip,A+i2*nskip,i1*sizeof(dReal));
216    memcpy (A+i2*nskip,tmprow,i1*sizeof(dReal));
217  }
218  for (i=i1+1; i<i2; i++) {
219    tmp = A[i2*nskip+i];
220    A[i2*nskip+i] = A[i*nskip+i1];
221    A[i*nskip+i1] = tmp;
222  }
223  tmp = A[i1*nskip+i1];
224  A[i1*nskip+i1] = A[i2*nskip+i2];
225  A[i2*nskip+i2] = tmp;
226  for (i=i2+1; i<n; i++) {
227    tmp = A[i*nskip+i1];
228    A[i*nskip+i1] = A[i*nskip+i2];
229    A[i*nskip+i2] = tmp;
230  }
231  UNALLOCA(tmprow);
232# endif
233
234}
235
236
237// swap two indexes in the n*n LCP problem. i1 must be <= i2.
238
239static void swapProblem (ATYPE A, dReal *x, dReal *b, dReal *w, dReal *lo,
240                         dReal *hi, int *p, int *state, int *findex,
241                         int n, int i1, int i2, int nskip,
242                         int do_fast_row_swaps)
243{
244  dReal tmp;
245  int tmpi;
246  dIASSERT (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n &&
247            i1 <= i2);
248  if (i1==i2) return;
249  swapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps);
250#ifdef dUSE_MALLOC_FOR_ALLOCA
251  if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY)
252    return;
253#endif
254  tmp = x[i1];
255  x[i1] = x[i2];
256  x[i2] = tmp;
257  tmp = b[i1];
258  b[i1] = b[i2];
259  b[i2] = tmp;
260  tmp = w[i1];
261  w[i1] = w[i2];
262  w[i2] = tmp;
263  tmp = lo[i1];
264  lo[i1] = lo[i2];
265  lo[i2] = tmp;
266  tmp = hi[i1];
267  hi[i1] = hi[i2];
268  hi[i2] = tmp;
269  tmpi = p[i1];
270  p[i1] = p[i2];
271  p[i2] = tmpi;
272  tmpi = state[i1];
273  state[i1] = state[i2];
274  state[i2] = tmpi;
275  if (findex) {
276    tmpi = findex[i1];
277    findex[i1] = findex[i2];
278    findex[i2] = tmpi;
279  }
280}
281
282
283// for debugging - check that L,d is the factorization of A[C,C].
284// A[C,C] has size nC*nC and leading dimension nskip.
285// L has size nC*nC and leading dimension nskip.
286// d has size nC.
287
288#ifdef DEBUG_LCP
289
290static void checkFactorization (ATYPE A, dReal *_L, dReal *_d,
291                                int nC, int *C, int nskip)
292{
293  int i,j;
294  if (nC==0) return;
295
296  // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
297  dMatrix A1 (nC,nC);
298  for (i=0; i<nC; i++) {
299    for (j=0; j<=i; j++) A1(i,j) = A1(j,i) = AROW(i)[j];
300  }
301  dMatrix A2 = A1.select (nC,C,nC,C);
302
303  // printf ("A1=\n"); A1.print(); printf ("\n");
304  // printf ("A2=\n"); A2.print(); printf ("\n");
305
306  // compute A3 = L*D*L'
307  dMatrix L (nC,nC,_L,nskip,1);
308  dMatrix D (nC,nC);
309  for (i=0; i<nC; i++) D(i,i) = 1/_d[i];
310  L.clearUpperTriangle();
311  for (i=0; i<nC; i++) L(i,i) = 1;
312  dMatrix A3 = L * D * L.transpose();
313
314  // printf ("L=\n"); L.print(); printf ("\n");
315  // printf ("D=\n"); D.print(); printf ("\n");
316  // printf ("A3=\n"); A2.print(); printf ("\n");
317
318  // compare A2 and A3
319  dReal diff = A2.maxDifference (A3);
320  if (diff > 1e-8)
321    dDebug (0,"L*D*L' check, maximum difference = %.6e\n",diff);
322}
323
324#endif
325
326
327// for debugging
328
329#ifdef DEBUG_LCP
330
331static void checkPermutations (int i, int n, int nC, int nN, int *p, int *C)
332{
333  int j,k;
334  dIASSERT (nC>=0 && nN>=0 && (nC+nN)==i && i < n);
335  for (k=0; k<i; k++) dIASSERT (p[k] >= 0 && p[k] < i);
336  for (k=i; k<n; k++) dIASSERT (p[k] == k);
337  for (j=0; j<nC; j++) {
338    int C_is_bad = 1;
339    for (k=0; k<nC; k++) if (C[k]==j) C_is_bad = 0;
340    dIASSERT (C_is_bad==0);
341  }
342}
343
344#endif
345
346//***************************************************************************
347// dLCP manipulator object. this represents an n*n LCP problem.
348//
349// two index sets C and N are kept. each set holds a subset of
350// the variable indexes 0..n-1. an index can only be in one set.
351// initially both sets are empty.
352//
353// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
354
355#ifdef dLCP_SLOW
356
357// simple but slow implementation of dLCP, for testing the LCP drivers.
358
359#include "array.h"
360
361struct dLCP {
362  int n,nub,nskip;
363  dReal *Adata,*x,*b,*w,*lo,*hi;        // LCP problem data
364  ATYPE A;                              // A rows
365  dArray<int> C,N;                      // index sets
366  int last_i_for_solve1;                // last i value given to solve1
367
368  dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
369        dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
370        dReal *_Dell, dReal *_ell, dReal *_tmp,
371        int *_state, int *_findex, int *_p, int *_C, dReal **Arows);
372  // the constructor is given an initial problem description (A,x,b,w) and
373  // space for other working data (which the caller may allocate on the stack).
374  // some of this data is specific to the fast dLCP implementation.
375  // the matrices A and L have size n*n, vectors have size n*1.
376  // A represents a symmetric matrix but only the lower triangle is valid.
377  // `nub' is the number of unbounded indexes at the start. all the indexes
378  // 0..nub-1 will be put into C.
379
380  ~dLCP();
381
382  int getNub() { return nub; }
383  // return the value of `nub'. the constructor may want to change it,
384  // so the caller should find out its new value.
385
386  // transfer functions: transfer index i to the given set (C or N). indexes
387  // less than `nub' can never be given. A,x,b,w,etc may be permuted by these
388  // functions, the caller must be robust to this.
389
390  void transfer_i_to_C (int i);
391    // this assumes C and N span 1:i-1. this also assumes that solve1() has
392    // been recently called for the same i without any other transfer
393    // functions in between (thereby allowing some data reuse for the fast
394    // implementation).
395  void transfer_i_to_N (int i);
396    // this assumes C and N span 1:i-1.
397  void transfer_i_from_N_to_C (int i);
398  void transfer_i_from_C_to_N (int i);
399
400  int numC();
401  int numN();
402  // return the number of indexes in set C/N
403
404  int indexC (int i);
405  int indexN (int i);
406  // return index i in set C/N.
407
408  // accessor and arithmetic functions. Aij translates as A(i,j), etc.
409  // make sure that only the lower triangle of A is ever referenced.
410
411  dReal Aii (int i);
412  dReal AiC_times_qC (int i, dReal *q);
413  dReal AiN_times_qN (int i, dReal *q);                 // for all Nj
414  void pN_equals_ANC_times_qC (dReal *p, dReal *q);     // for all Nj
415  void pN_plusequals_ANi (dReal *p, int i, int sign=1);
416    // for all Nj. sign = +1,-1. assumes i > maximum index in N.
417  void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q);
418  void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q); // for all Nj
419  void solve1 (dReal *a, int i, int dir=1, int only_transfer=0);
420    // get a(C) = - dir * A(C,C) \ A(C,i). dir must be +/- 1.
421    // the fast version of this function computes some data that is needed by
422    // transfer_i_to_C(). if only_transfer is nonzero then this function
423    // *only* computes that data, it does not set a(C).
424
425  void unpermute();
426  // call this at the end of the LCP function. if the x/w values have been
427  // permuted then this will unscramble them.
428};
429
430
431dLCP::dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
432            dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
433            dReal *_Dell, dReal *_ell, dReal *_tmp,
434            int *_state, int *_findex, int *_p, int *_C, dReal **Arows)
435{
436  dUASSERT (_findex==0,"slow dLCP object does not support findex array");
437
438  n = _n;
439  nub = _nub;
440  Adata = _Adata;
441  A = 0;
442  x = _x;
443  b = _b;
444  w = _w;
445  lo = _lo;
446  hi = _hi;
447  nskip = dPAD(n);
448  dSetZero (x,n);
449  last_i_for_solve1 = -1;
450
451  int i,j;
452  C.setSize (n);
453  N.setSize (n);
454  for (i=0; i<n; i++) {
455    C[i] = 0;
456    N[i] = 0;
457  }
458
459# ifdef ROWPTRS
460  // make matrix row pointers
461  A = Arows;
462  for (i=0; i<n; i++) A[i] = Adata + i*nskip;
463# else
464  A = Adata;
465# endif
466
467  // lets make A symmetric
468  for (i=0; i<n; i++) {
469    for (j=i+1; j<n; j++) AROW(i)[j] = AROW(j)[i];
470  }
471
472  // if nub>0, put all indexes 0..nub-1 into C and solve for x
473  if (nub > 0) {
474    for (i=0; i<nub; i++) memcpy (_L+i*nskip,AROW(i),(i+1)*sizeof(dReal));
475    dFactorLDLT (_L,_d,nub,nskip);
476    memcpy (x,b,nub*sizeof(dReal));
477    dSolveLDLT (_L,_d,x,nub,nskip);
478    dSetZero (_w,nub);
479    for (i=0; i<nub; i++) C[i] = 1;
480  }
481}
482
483
484dLCP::~dLCP()
485{
486}
487
488
489void dLCP::transfer_i_to_C (int i)
490{
491  if (i < nub) dDebug (0,"bad i");
492  if (C[i]) dDebug (0,"i already in C");
493  if (N[i]) dDebug (0,"i already in N");
494  for (int k=0; k<i; k++) {
495    if (!(C[k] ^ N[k])) dDebug (0,"assumptions for C and N violated");
496  }
497  for (int k=i; k<n; k++)
498    if (C[k] || N[k]) dDebug (0,"assumptions for C and N violated");
499  if (i != last_i_for_solve1) dDebug (0,"assumptions for i violated");
500  last_i_for_solve1 = -1;
501  C[i] = 1;
502}
503
504
505void dLCP::transfer_i_to_N (int i)
506{
507  if (i < nub) dDebug (0,"bad i");
508  if (C[i]) dDebug (0,"i already in C");
509  if (N[i]) dDebug (0,"i already in N");
510  for (int k=0; k<i; k++)
511    if (!C[k] && !N[k]) dDebug (0,"assumptions for C and N violated");
512  for (int k=i; k<n; k++)
513    if (C[k] || N[k]) dDebug (0,"assumptions for C and N violated");
514  last_i_for_solve1 = -1;
515  N[i] = 1;
516}
517
518
519void dLCP::transfer_i_from_N_to_C (int i)
520{
521  if (i < nub) dDebug (0,"bad i");
522  if (C[i]) dDebug (0,"i already in C");
523  if (!N[i]) dDebug (0,"i not in N");
524  last_i_for_solve1 = -1;
525  N[i] = 0;
526  C[i] = 1;
527}
528
529
530void dLCP::transfer_i_from_C_to_N (int i)
531{
532  if (i < nub) dDebug (0,"bad i");
533  if (N[i]) dDebug (0,"i already in N");
534  if (!C[i]) dDebug (0,"i not in C");
535  last_i_for_solve1 = -1;
536  C[i] = 0;
537  N[i] = 1;
538}
539
540
541int dLCP::numC()
542{
543  int i,count=0;
544  for (i=0; i<n; i++) if (C[i]) count++;
545  return count;
546}
547
548
549int dLCP::numN()
550{
551  int i,count=0;
552  for (i=0; i<n; i++) if (N[i]) count++;
553  return count;
554}
555
556
557int dLCP::indexC (int i)
558{
559  int k,count=0;
560  for (k=0; k<n; k++) {
561    if (C[k]) {
562      if (count==i) return k;
563      count++;
564    }
565  }
566  dDebug (0,"bad index C (%d)",i);
567  return 0;
568}
569
570
571int dLCP::indexN (int i)
572{
573  int k,count=0;
574  for (k=0; k<n; k++) {
575    if (N[k]) {
576      if (count==i) return k;
577      count++;
578    }
579  }
580  dDebug (0,"bad index into N");
581  return 0;
582}
583
584
585dReal dLCP::Aii (int i)
586{
587  return AROW(i)[i];
588}
589
590
591dReal dLCP::AiC_times_qC (int i, dReal *q)
592{
593  dReal sum = 0;
594  for (int k=0; k<n; k++) if (C[k]) sum += AROW(i)[k] * q[k];
595  return sum;
596}
597
598
599dReal dLCP::AiN_times_qN (int i, dReal *q)
600{
601  dReal sum = 0;
602  for (int k=0; k<n; k++) if (N[k]) sum += AROW(i)[k] * q[k];
603  return sum;
604}
605
606
607void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
608{
609  dReal sum;
610  for (int ii=0; ii<n; ii++) if (N[ii]) {
611    sum = 0;
612    for (int jj=0; jj<n; jj++) if (C[jj]) sum += AROW(ii)[jj] * q[jj];
613    p[ii] = sum;
614  }
615}
616
617
618void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign)
619{
620  int k;
621  for (k=0; k<n; k++) if (N[k] && k >= i) dDebug (0,"N assumption violated");
622  if (sign > 0) {
623    for (k=0; k<n; k++) if (N[k]) p[k] += AROW(i)[k];
624  }
625  else {
626    for (k=0; k<n; k++) if (N[k]) p[k] -= AROW(i)[k];
627  }
628}
629
630
631void dLCP::pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
632{
633  for (int k=0; k<n; k++) if (C[k]) p[k] += s*q[k];
634}
635
636
637void dLCP::pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
638{
639  for (int k=0; k<n; k++) if (N[k]) p[k] += s*q[k];
640}
641
642
643void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer)
644{
645
646  ALLOCA (dReal,AA,n*nskip*sizeof(dReal));
647#ifdef dUSE_MALLOC_FOR_ALLOCA
648    if (AA == NULL) {
649      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
650      return;
651    }
652#endif
653  ALLOCA (dReal,dd,n*sizeof(dReal));
654#ifdef dUSE_MALLOC_FOR_ALLOCA
655    if (dd == NULL) {
656      UNALLOCA(AA);
657      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
658      return;
659    }
660#endif
661  ALLOCA (dReal,bb,n*sizeof(dReal));
662#ifdef dUSE_MALLOC_FOR_ALLOCA
663    if (bb == NULL) {
664      UNALLOCA(AA);
665      UNALLOCA(dd);
666      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
667      return;
668    }
669#endif
670
671  int ii,jj,AAi,AAj;
672
673  last_i_for_solve1 = i;
674  AAi = 0;
675  for (ii=0; ii<n; ii++) if (C[ii]) {
676    AAj = 0;
677    for (jj=0; jj<n; jj++) if (C[jj]) {
678      AA[AAi*nskip+AAj] = AROW(ii)[jj];
679      AAj++;
680    }
681    bb[AAi] = AROW(i)[ii];
682    AAi++;
683  }
684  if (AAi==0) {
685      UNALLOCA (AA);
686      UNALLOCA (dd);
687      UNALLOCA (bb);
688      return;
689  }
690
691  dFactorLDLT (AA,dd,AAi,nskip);
692  dSolveLDLT (AA,dd,bb,AAi,nskip);
693
694  AAi=0;
695  if (dir > 0) {
696    for (ii=0; ii<n; ii++) if (C[ii]) a[ii] = -bb[AAi++];
697  }
698  else {
699    for (ii=0; ii<n; ii++) if (C[ii]) a[ii] = bb[AAi++];
700  }
701
702  UNALLOCA (AA);
703  UNALLOCA (dd);
704  UNALLOCA (bb);
705}
706
707
708void dLCP::unpermute()
709{
710}
711
712#endif // dLCP_SLOW
713
714//***************************************************************************
715// fast implementation of dLCP. see the above definition of dLCP for
716// interface comments.
717//
718// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
719// permuted as the other vectors/matrices are permuted.
720//
721// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
722// contiguous indexes. the don't-care indexes follow N.
723//
724// an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
725// added or removed from the set C the factorization is updated.
726// thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
727// the leading dimension of the matrix L is always `nskip'.
728//
729// at the start there may be other indexes that are unbounded but are not
730// included in `nub'. dLCP will permute the matrix so that absolutely all
731// unbounded vectors are at the start. thus there may be some initial
732// permutation.
733//
734// the algorithms here assume certain patterns, particularly with respect to
735// index transfer.
736
737#ifdef dLCP_FAST
738
739struct dLCP {
740  int n,nskip,nub;
741  ATYPE A;                              // A rows
742  dReal *Adata,*x,*b,*w,*lo,*hi;        // permuted LCP problem data
743  dReal *L,*d;                          // L*D*L' factorization of set C
744  dReal *Dell,*ell,*tmp;
745  int *state,*findex,*p,*C;
746  int nC,nN;                            // size of each index set
747
748  dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
749        dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
750        dReal *_Dell, dReal *_ell, dReal *_tmp,
751        int *_state, int *_findex, int *_p, int *_C, dReal **Arows);
752  int getNub() { return nub; }
753  void transfer_i_to_C (int i);
754  void transfer_i_to_N (int i)
755    { nN++; }                   // because we can assume C and N span 1:i-1
756  void transfer_i_from_N_to_C (int i);
757  void transfer_i_from_C_to_N (int i);
758  int numC() { return nC; }
759  int numN() { return nN; }
760  int indexC (int i) { return i; }
761  int indexN (int i) { return i+nC; }
762  dReal Aii (int i) { return AROW(i)[i]; }
763  dReal AiC_times_qC (int i, dReal *q) { return dDot (AROW(i),q,nC); }
764  dReal AiN_times_qN (int i, dReal *q) { return dDot (AROW(i)+nC,q+nC,nN); }
765  void pN_equals_ANC_times_qC (dReal *p, dReal *q);
766  void pN_plusequals_ANi (dReal *p, int i, int sign=1);
767  void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
768    { for (int i=0; i<nC; i++) p[i] += s*q[i]; }
769  void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
770    { for (int i=0; i<nN; i++) p[i+nC] += s*q[i+nC]; }
771  void solve1 (dReal *a, int i, int dir=1, int only_transfer=0);
772  void unpermute();
773};
774
775
776dLCP::dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
777            dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
778            dReal *_Dell, dReal *_ell, dReal *_tmp,
779            int *_state, int *_findex, int *_p, int *_C, dReal **Arows)
780{
781  n = _n;
782  nub = _nub;
783  Adata = _Adata;
784  A = 0;
785  x = _x;
786  b = _b;
787  w = _w;
788  lo = _lo;
789  hi = _hi;
790  L = _L;
791  d = _d;
792  Dell = _Dell;
793  ell = _ell;
794  tmp = _tmp;
795  state = _state;
796  findex = _findex;
797  p = _p;
798  C = _C;
799  nskip = dPAD(n);
800  dSetZero (x,n);
801
802  int k;
803
804# ifdef ROWPTRS
805  // make matrix row pointers
806  A = Arows;
807  for (k=0; k<n; k++) A[k] = Adata + k*nskip;
808# else
809  A = Adata;
810# endif
811
812  nC = 0;
813  nN = 0;
814  for (k=0; k<n; k++) p[k]=k;           // initially unpermuted
815
816  /*
817  // for testing, we can do some random swaps in the area i > nub
818  if (nub < n) {
819    for (k=0; k<100; k++) {
820      int i1,i2;
821      do {
822        i1 = dRandInt(n-nub)+nub;
823        i2 = dRandInt(n-nub)+nub;
824      }
825      while (i1 > i2);
826      //printf ("--> %d %d\n",i1,i2);
827      swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i1,i2,nskip,0);
828    }
829  }
830  */
831
832  // permute the problem so that *all* the unbounded variables are at the
833  // start, i.e. look for unbounded variables not included in `nub'. we can
834  // potentially push up `nub' this way and get a bigger initial factorization.
835  // note that when we swap rows/cols here we must not just swap row pointers,
836  // as the initial factorization relies on the data being all in one chunk.
837  // variables that have findex >= 0 are *not* considered to be unbounded even
838  // if lo=-inf and hi=inf - this is because these limits may change during the
839  // solution process.
840
841  for (k=nub; k<n; k++) {
842    if (findex && findex[k] >= 0) continue;
843    if (lo[k]==-dInfinity && hi[k]==dInfinity) {
844      swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nub,k,nskip,0);
845      nub++;
846    }
847  }
848
849  // if there are unbounded variables at the start, factorize A up to that
850  // point and solve for x. this puts all indexes 0..nub-1 into C.
851  if (nub > 0) {
852    for (k=0; k<nub; k++) memcpy (L+k*nskip,AROW(k),(k+1)*sizeof(dReal));
853    dFactorLDLT (L,d,nub,nskip);
854    memcpy (x,b,nub*sizeof(dReal));
855    dSolveLDLT (L,d,x,nub,nskip);
856    dSetZero (w,nub);
857    for (k=0; k<nub; k++) C[k] = k;
858    nC = nub;
859  }
860
861  // permute the indexes > nub such that all findex variables are at the end
862  if (findex) {
863    int num_at_end = 0;
864    for (k=n-1; k >= nub; k--) {
865      if (findex[k] >= 0) {
866        swapProblem (A,x,b,w,lo,hi,p,state,findex,n,k,n-1-num_at_end,nskip,1);
867        num_at_end++;
868      }
869    }
870  }
871
872  // print info about indexes
873  /*
874  for (k=0; k<n; k++) {
875    if (k<nub) printf ("C");
876    else if (lo[k]==-dInfinity && hi[k]==dInfinity) printf ("c");
877    else printf (".");
878  }
879  printf ("\n");
880  */
881}
882
883
884void dLCP::transfer_i_to_C (int i)
885{
886  int j;
887  if (nC > 0) {
888    // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
889    for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j];
890    d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC));
891  }
892  else {
893    d[0] = dRecip (AROW(i)[i]);
894  }
895  swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1);
896  C[nC] = nC;
897  nC++;
898
899# ifdef DEBUG_LCP
900  checkFactorization (A,L,d,nC,C,nskip);
901  if (i < (n-1)) checkPermutations (i+1,n,nC,nN,p,C);
902# endif
903}
904
905
906void dLCP::transfer_i_from_N_to_C (int i)
907{
908  int j;
909  if (nC > 0) {
910    dReal *aptr = AROW(i);
911#   ifdef NUB_OPTIMIZATIONS
912    // if nub>0, initial part of aptr unpermuted
913    for (j=0; j<nub; j++) Dell[j] = aptr[j];
914    for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]];
915#   else
916    for (j=0; j<nC; j++) Dell[j] = aptr[C[j]];
917#   endif
918    dSolveL1 (L,Dell,nC,nskip);
919    for (j=0; j<nC; j++) ell[j] = Dell[j] * d[j];
920    for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j];
921    d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC));
922  }
923  else {
924    d[0] = dRecip (AROW(i)[i]);
925  }
926  swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1);
927  C[nC] = nC;
928  nN--;
929  nC++;
930
931  // @@@ TO DO LATER
932  // if we just finish here then we'll go back and re-solve for
933  // delta_x. but actually we can be more efficient and incrementally
934  // update delta_x here. but if we do this, we wont have ell and Dell
935  // to use in updating the factorization later.
936
937# ifdef DEBUG_LCP
938  checkFactorization (A,L,d,nC,C,nskip);
939# endif
940}
941
942
943void dLCP::transfer_i_from_C_to_N (int i)
944{
945  // remove a row/column from the factorization, and adjust the
946  // indexes (black magic!)
947  int j,k;
948  for (j=0; j<nC; j++) if (C[j]==i) {
949    dLDLTRemove (A,C,L,d,n,nC,j,nskip);
950    for (k=0; k<nC; k++) if (C[k]==nC-1) {
951      C[k] = C[j];
952      if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int));
953      break;
954    }
955    dIASSERT (k < nC);
956    break;
957  }
958  dIASSERT (j < nC);
959  swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i,nC-1,nskip,1);
960  nC--;
961  nN++;
962
963# ifdef DEBUG_LCP
964  checkFactorization (A,L,d,nC,C,nskip);
965# endif
966}
967
968
969void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
970{
971  // we could try to make this matrix-vector multiplication faster using
972  // outer product matrix tricks, e.g. with the dMultidotX() functions.
973  // but i tried it and it actually made things slower on random 100x100
974  // problems because of the overhead involved. so we'll stick with the
975  // simple method for now.
976  for (int i=0; i<nN; i++) p[i+nC] = dDot (AROW(i+nC),q,nC);
977}
978
979
980void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign)
981{
982  dReal *aptr = AROW(i)+nC;
983  if (sign > 0) {
984    for (int i=0; i<nN; i++) p[i+nC] += aptr[i];
985  }
986  else {
987    for (int i=0; i<nN; i++) p[i+nC] -= aptr[i];
988  }
989}
990
991
992void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer)
993{
994  // the `Dell' and `ell' that are computed here are saved. if index i is
995  // later added to the factorization then they can be reused.
996  //
997  // @@@ question: do we need to solve for entire delta_x??? yes, but
998  //     only if an x goes below 0 during the step.
999
1000  int j;
1001  if (nC > 0) {
1002    dReal *aptr = AROW(i);
1003#   ifdef NUB_OPTIMIZATIONS
1004    // if nub>0, initial part of aptr[] is guaranteed unpermuted
1005    for (j=0; j<nub; j++) Dell[j] = aptr[j];
1006    for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]];
1007#   else
1008    for (j=0; j<nC; j++) Dell[j] = aptr[C[j]];
1009#   endif
1010    dSolveL1 (L,Dell,nC,nskip);
1011    for (j=0; j<nC; j++) ell[j] = Dell[j] * d[j];
1012
1013    if (!only_transfer) {
1014      for (j=0; j<nC; j++) tmp[j] = ell[j];
1015      dSolveL1T (L,tmp,nC,nskip);
1016      if (dir > 0) {
1017        for (j=0; j<nC; j++) a[C[j]] = -tmp[j];
1018      }
1019      else {
1020        for (j=0; j<nC; j++) a[C[j]] = tmp[j];
1021      }
1022    }
1023  }
1024}
1025
1026
1027void dLCP::unpermute()
1028{
1029  // now we have to un-permute x and w
1030  int j;
1031  ALLOCA (dReal,tmp,n*sizeof(dReal));
1032#ifdef dUSE_MALLOC_FOR_ALLOCA
1033    if (tmp == NULL) {
1034      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1035      return;
1036    }
1037#endif
1038  memcpy (tmp,x,n*sizeof(dReal));
1039  for (j=0; j<n; j++) x[p[j]] = tmp[j];
1040  memcpy (tmp,w,n*sizeof(dReal));
1041  for (j=0; j<n; j++) w[p[j]] = tmp[j];
1042
1043  UNALLOCA (tmp);
1044}
1045
1046#endif // dLCP_FAST
1047
1048//***************************************************************************
1049// an unoptimized Dantzig LCP driver routine for the basic LCP problem.
1050// must have lo=0, hi=dInfinity, and nub=0.
1051
1052void dSolveLCPBasic (int n, dReal *A, dReal *x, dReal *b,
1053                     dReal *w, int nub, dReal *lo, dReal *hi)
1054{
1055  dAASSERT (n>0 && A && x && b && w && nub == 0);
1056
1057  int i,k;
1058  int nskip = dPAD(n);
1059  ALLOCA (dReal,L,n*nskip*sizeof(dReal));
1060#ifdef dUSE_MALLOC_FOR_ALLOCA
1061    if (L == NULL) {
1062      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1063      return;
1064    }
1065#endif
1066  ALLOCA (dReal,d,n*sizeof(dReal));
1067#ifdef dUSE_MALLOC_FOR_ALLOCA
1068    if (d == NULL) {
1069      UNALLOCA(L);
1070      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1071      return;
1072    }
1073#endif
1074  ALLOCA (dReal,delta_x,n*sizeof(dReal));
1075#ifdef dUSE_MALLOC_FOR_ALLOCA
1076    if (delta_x == NULL) {
1077      UNALLOCA(d);
1078      UNALLOCA(L);
1079      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1080      return;
1081    }
1082#endif
1083  ALLOCA (dReal,delta_w,n*sizeof(dReal));
1084#ifdef dUSE_MALLOC_FOR_ALLOCA
1085    if (delta_w == NULL) {
1086      UNALLOCA(delta_x);
1087      UNALLOCA(d);
1088      UNALLOCA(L);
1089      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1090      return;
1091    }
1092#endif
1093  ALLOCA (dReal,Dell,n*sizeof(dReal));
1094#ifdef dUSE_MALLOC_FOR_ALLOCA
1095    if (Dell == NULL) {
1096      UNALLOCA(delta_w);
1097      UNALLOCA(delta_x);
1098      UNALLOCA(d);
1099      UNALLOCA(L);
1100      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1101      return;
1102    }
1103#endif
1104  ALLOCA (dReal,ell,n*sizeof(dReal));
1105#ifdef dUSE_MALLOC_FOR_ALLOCA
1106    if (ell == NULL) {
1107      UNALLOCA(Dell);
1108      UNALLOCA(delta_w);
1109      UNALLOCA(delta_x);
1110      UNALLOCA(d);
1111      UNALLOCA(L);
1112      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1113      return;
1114    }
1115#endif
1116  ALLOCA (dReal,tmp,n*sizeof(dReal));
1117#ifdef dUSE_MALLOC_FOR_ALLOCA
1118    if (tmp == NULL) {
1119      UNALLOCA(ell);
1120      UNALLOCA(Dell);
1121      UNALLOCA(delta_w);
1122      UNALLOCA(delta_x);
1123      UNALLOCA(d);
1124      UNALLOCA(L);
1125      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1126      return;
1127    }
1128#endif
1129  ALLOCA (dReal*,Arows,n*sizeof(dReal*));
1130#ifdef dUSE_MALLOC_FOR_ALLOCA
1131    if (Arows == NULL) {
1132      UNALLOCA(tmp);
1133      UNALLOCA(ell);
1134      UNALLOCA(Dell);
1135      UNALLOCA(delta_w);
1136      UNALLOCA(delta_x);
1137      UNALLOCA(d);
1138      UNALLOCA(L);
1139      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1140      return;
1141    }
1142#endif
1143  ALLOCA (int,p,n*sizeof(int));
1144#ifdef dUSE_MALLOC_FOR_ALLOCA
1145    if (p == NULL) {
1146      UNALLOCA(Arows);
1147      UNALLOCA(tmp);
1148      UNALLOCA(ell);
1149      UNALLOCA(Dell);
1150      UNALLOCA(delta_w);
1151      UNALLOCA(delta_x);
1152      UNALLOCA(d);
1153      UNALLOCA(L);
1154      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1155      return;
1156    }
1157#endif
1158  ALLOCA (int,C,n*sizeof(int));
1159#ifdef dUSE_MALLOC_FOR_ALLOCA
1160    if (C == NULL) {
1161      UNALLOCA(p);
1162      UNALLOCA(Arows);
1163      UNALLOCA(tmp);
1164      UNALLOCA(ell);
1165      UNALLOCA(Dell);
1166      UNALLOCA(delta_w);
1167      UNALLOCA(delta_x);
1168      UNALLOCA(d);
1169      UNALLOCA(L);
1170      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1171      return;
1172    }
1173#endif
1174  ALLOCA (int,dummy,n*sizeof(int));
1175#ifdef dUSE_MALLOC_FOR_ALLOCA
1176    if (dummy == NULL) {
1177      UNALLOCA(C);
1178      UNALLOCA(p);
1179      UNALLOCA(Arows);
1180      UNALLOCA(tmp);
1181      UNALLOCA(ell);
1182      UNALLOCA(Dell);
1183      UNALLOCA(delta_w);
1184      UNALLOCA(delta_x);
1185      UNALLOCA(d);
1186      UNALLOCA(L);
1187      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1188      return;
1189    }
1190#endif
1191
1192
1193  dLCP lcp (n,0,A,x,b,w,tmp,tmp,L,d,Dell,ell,tmp,dummy,dummy,p,C,Arows);
1194  nub = lcp.getNub();
1195
1196  for (i=0; i<n; i++) {
1197    w[i] = lcp.AiC_times_qC (i,x) - b[i];
1198    if (w[i] >= 0) {
1199      lcp.transfer_i_to_N (i);
1200    }
1201    else {
1202      for (;;) {
1203        // compute: delta_x(C) = -A(C,C)\A(C,i)
1204        dSetZero (delta_x,n);
1205        lcp.solve1 (delta_x,i);
1206#ifdef dUSE_MALLOC_FOR_ALLOCA
1207        if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1208          UNALLOCA(dummy);
1209          UNALLOCA(C);
1210          UNALLOCA(p);
1211          UNALLOCA(Arows);
1212          UNALLOCA(tmp);
1213          UNALLOCA(ell);
1214          UNALLOCA(Dell);
1215          UNALLOCA(delta_w);
1216          UNALLOCA(delta_x);
1217          UNALLOCA(d);
1218          UNALLOCA(L);
1219          return;
1220        }
1221#endif
1222        delta_x[i] = 1;
1223
1224        // compute: delta_w = A*delta_x
1225        dSetZero (delta_w,n);
1226        lcp.pN_equals_ANC_times_qC (delta_w,delta_x);
1227        lcp.pN_plusequals_ANi (delta_w,i);
1228        delta_w[i] = lcp.AiC_times_qC (i,delta_x) + lcp.Aii(i);
1229
1230        // find index to switch
1231        int si = i;             // si = switch index
1232        int si_in_N = 0;        // set to 1 if si in N
1233        dReal s = -w[i]/delta_w[i];
1234
1235        if (s <= 0) {
1236          dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s);
1237          if (i < (n-1)) {
1238            dSetZero (x+i,n-i);
1239            dSetZero (w+i,n-i);
1240          }
1241          goto done;
1242        }
1243
1244        for (k=0; k < lcp.numN(); k++) {
1245          if (delta_w[lcp.indexN(k)] < 0) {
1246            dReal s2 = -w[lcp.indexN(k)] / delta_w[lcp.indexN(k)];
1247            if (s2 < s) {
1248              s = s2;
1249              si = lcp.indexN(k);
1250              si_in_N = 1;
1251            }
1252          }
1253        }
1254        for (k=0; k < lcp.numC(); k++) {
1255          if (delta_x[lcp.indexC(k)] < 0) {
1256            dReal s2 = -x[lcp.indexC(k)] / delta_x[lcp.indexC(k)];
1257            if (s2 < s) {
1258              s = s2;
1259              si = lcp.indexC(k);
1260              si_in_N = 0;
1261            }
1262          }
1263        }
1264
1265        // apply x = x + s * delta_x
1266        lcp.pC_plusequals_s_times_qC (x,s,delta_x);
1267        x[i] += s;
1268        lcp.pN_plusequals_s_times_qN (w,s,delta_w);
1269        w[i] += s * delta_w[i];
1270
1271        // switch indexes between sets if necessary
1272        if (si==i) {
1273          w[i] = 0;
1274          lcp.transfer_i_to_C (i);
1275          break;
1276        }
1277        if (si_in_N) {
1278          w[si] = 0;
1279          lcp.transfer_i_from_N_to_C (si);
1280        }
1281        else {
1282          x[si] = 0;
1283          lcp.transfer_i_from_C_to_N (si);
1284        }
1285      }
1286    }
1287  }
1288
1289 done:
1290  lcp.unpermute();
1291
1292  UNALLOCA (L);
1293  UNALLOCA (d);
1294  UNALLOCA (delta_x);
1295  UNALLOCA (delta_w);
1296  UNALLOCA (Dell);
1297  UNALLOCA (ell);
1298  UNALLOCA (tmp);
1299  UNALLOCA (Arows);
1300  UNALLOCA (p);
1301  UNALLOCA (C);
1302  UNALLOCA (dummy);
1303}
1304
1305//***************************************************************************
1306// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1307
1308void dSolveLCP (int n, dReal *A, dReal *x, dReal *b,
1309                dReal *w, int nub, dReal *lo, dReal *hi, int *findex)
1310{
1311  dAASSERT (n>0 && A && x && b && w && lo && hi && nub >= 0 && nub <= n);
1312
1313  int i,k,hit_first_friction_index = 0;
1314  int nskip = dPAD(n);
1315
1316  // if all the variables are unbounded then we can just factor, solve,
1317  // and return
1318  if (nub >= n) {
1319    dFactorLDLT (A,w,n,nskip);          // use w for d
1320    dSolveLDLT (A,w,b,n,nskip);
1321    memcpy (x,b,n*sizeof(dReal));
1322    dSetZero (w,n);
1323
1324    return;
1325  }
1326# ifndef dNODEBUG
1327  // check restrictions on lo and hi
1328  for (k=0; k<n; k++) dIASSERT (lo[k] <= 0 && hi[k] >= 0);
1329# endif
1330  ALLOCA (dReal,L,n*nskip*sizeof(dReal));
1331#ifdef dUSE_MALLOC_FOR_ALLOCA
1332    if (L == NULL) {
1333      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1334      return;
1335    }
1336#endif
1337  ALLOCA (dReal,d,n*sizeof(dReal));
1338#ifdef dUSE_MALLOC_FOR_ALLOCA
1339    if (d == NULL) {
1340      UNALLOCA(L);
1341      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1342      return;
1343    }
1344#endif
1345  ALLOCA (dReal,delta_x,n*sizeof(dReal));
1346#ifdef dUSE_MALLOC_FOR_ALLOCA
1347    if (delta_x == NULL) {
1348      UNALLOCA(d);
1349      UNALLOCA(L);
1350      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1351      return;
1352    }
1353#endif
1354  ALLOCA (dReal,delta_w,n*sizeof(dReal));
1355#ifdef dUSE_MALLOC_FOR_ALLOCA
1356    if (delta_w == NULL) {
1357      UNALLOCA(delta_x);
1358      UNALLOCA(d);
1359      UNALLOCA(L);
1360      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1361      return;
1362    }
1363#endif
1364  ALLOCA (dReal,Dell,n*sizeof(dReal));
1365#ifdef dUSE_MALLOC_FOR_ALLOCA
1366    if (Dell == NULL) {
1367      UNALLOCA(delta_w);
1368      UNALLOCA(delta_x);
1369      UNALLOCA(d);
1370      UNALLOCA(L);
1371      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1372      return;
1373    }
1374#endif
1375  ALLOCA (dReal,ell,n*sizeof(dReal));
1376#ifdef dUSE_MALLOC_FOR_ALLOCA
1377    if (ell == NULL) {
1378      UNALLOCA(Dell);
1379      UNALLOCA(delta_w);
1380      UNALLOCA(delta_x);
1381      UNALLOCA(d);
1382      UNALLOCA(L);
1383      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1384      return;
1385    }
1386#endif
1387  ALLOCA (dReal*,Arows,n*sizeof(dReal*));
1388#ifdef dUSE_MALLOC_FOR_ALLOCA
1389    if (Arows == NULL) {
1390      UNALLOCA(ell);
1391      UNALLOCA(Dell);
1392      UNALLOCA(delta_w);
1393      UNALLOCA(delta_x);
1394      UNALLOCA(d);
1395      UNALLOCA(L);
1396      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1397      return;
1398    }
1399#endif
1400  ALLOCA (int,p,n*sizeof(int));
1401#ifdef dUSE_MALLOC_FOR_ALLOCA
1402    if (p == NULL) {
1403      UNALLOCA(Arows);
1404      UNALLOCA(ell);
1405      UNALLOCA(Dell);
1406      UNALLOCA(delta_w);
1407      UNALLOCA(delta_x);
1408      UNALLOCA(d);
1409      UNALLOCA(L);
1410      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1411      return;
1412    }
1413#endif
1414  ALLOCA (int,C,n*sizeof(int));
1415#ifdef dUSE_MALLOC_FOR_ALLOCA
1416    if (C == NULL) {
1417      UNALLOCA(p);
1418      UNALLOCA(Arows);
1419      UNALLOCA(ell);
1420      UNALLOCA(Dell);
1421      UNALLOCA(delta_w);
1422      UNALLOCA(delta_x);
1423      UNALLOCA(d);
1424      UNALLOCA(L);
1425      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1426      return;
1427    }
1428#endif
1429
1430  int dir;
1431  dReal dirf;
1432
1433  // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1434  ALLOCA (int,state,n*sizeof(int));
1435#ifdef dUSE_MALLOC_FOR_ALLOCA
1436    if (state == NULL) {
1437      UNALLOCA(C);
1438      UNALLOCA(p);
1439      UNALLOCA(Arows);
1440      UNALLOCA(ell);
1441      UNALLOCA(Dell);
1442      UNALLOCA(delta_w);
1443      UNALLOCA(delta_x);
1444      UNALLOCA(d);
1445      UNALLOCA(L);
1446      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1447      return;
1448    }
1449#endif
1450
1451  // create LCP object. note that tmp is set to delta_w to save space, this
1452  // optimization relies on knowledge of how tmp is used, so be careful!
1453  dLCP *lcp=new dLCP(n,nub,A,x,b,w,lo,hi,L,d,Dell,ell,delta_w,state,findex,p,C,Arows);
1454  nub = lcp->getNub();
1455
1456  // loop over all indexes nub..n-1. for index i, if x(i),w(i) satisfy the
1457  // LCP conditions then i is added to the appropriate index set. otherwise
1458  // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1459  // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1460  // while driving x(i) we maintain the LCP conditions on the other variables
1461  // 0..i-1. we do this by watching out for other x(i),w(i) values going
1462  // outside the valid region, and then switching them between index sets
1463  // when that happens.
1464
1465  for (i=nub; i<n; i++) {
1466    // the index i is the driving index and indexes i+1..n-1 are "dont care",
1467    // i.e. when we make changes to the system those x's will be zero and we
1468    // don't care what happens to those w's. in other words, we only consider
1469    // an (i+1)*(i+1) sub-problem of A*x=b+w.
1470
1471    // if we've hit the first friction index, we have to compute the lo and
1472    // hi values based on the values of x already computed. we have been
1473    // permuting the indexes, so the values stored in the findex vector are
1474    // no longer valid. thus we have to temporarily unpermute the x vector.
1475    // for the purposes of this computation, 0*infinity = 0 ... so if the
1476    // contact constraint's normal force is 0, there should be no tangential
1477    // force applied.
1478
1479    if (hit_first_friction_index == 0 && findex && findex[i] >= 0) {
1480      // un-permute x into delta_w, which is not being used at the moment
1481      for (k=0; k<n; k++) delta_w[p[k]] = x[k];
1482
1483      // set lo and hi values
1484      for (k=i; k<n; k++) {
1485        dReal wfk = delta_w[findex[k]];
1486        if (wfk == 0) {
1487          hi[k] = 0;
1488          lo[k] = 0;
1489        }
1490        else {
1491          hi[k] = dFabs (hi[k] * wfk);
1492          lo[k] = -hi[k];
1493        }
1494      }
1495      hit_first_friction_index = 1;
1496    }
1497
1498    // thus far we have not even been computing the w values for indexes
1499    // greater than i, so compute w[i] now.
1500    w[i] = lcp->AiC_times_qC (i,x) + lcp->AiN_times_qN (i,x) - b[i];
1501
1502    // if lo=hi=0 (which can happen for tangential friction when normals are
1503    // 0) then the index will be assigned to set N with some state. however,
1504    // set C's line has zero size, so the index will always remain in set N.
1505    // with the "normal" switching logic, if w changed sign then the index
1506    // would have to switch to set C and then back to set N with an inverted
1507    // state. this is pointless, and also computationally expensive. to
1508    // prevent this from happening, we use the rule that indexes with lo=hi=0
1509    // will never be checked for set changes. this means that the state for
1510    // these indexes may be incorrect, but that doesn't matter.
1511
1512    // see if x(i),w(i) is in a valid region
1513    if (lo[i]==0 && w[i] >= 0) {
1514      lcp->transfer_i_to_N (i);
1515      state[i] = 0;
1516    }
1517    else if (hi[i]==0 && w[i] <= 0) {
1518      lcp->transfer_i_to_N (i);
1519      state[i] = 1;
1520    }
1521    else if (w[i]==0) {
1522      // this is a degenerate case. by the time we get to this test we know
1523      // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1524      // and similarly that hi > 0. this means that the line segment
1525      // corresponding to set C is at least finite in extent, and we are on it.
1526      // NOTE: we must call lcp->solve1() before lcp->transfer_i_to_C()
1527      lcp->solve1 (delta_x,i,0,1);
1528
1529#ifdef dUSE_MALLOC_FOR_ALLOCA
1530      if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1531        UNALLOCA(state);
1532        UNALLOCA(C);
1533        UNALLOCA(p);
1534        UNALLOCA(Arows);
1535        UNALLOCA(ell);
1536        UNALLOCA(Dell);
1537        UNALLOCA(delta_w);
1538        UNALLOCA(delta_x);
1539        UNALLOCA(d);
1540        UNALLOCA(L);
1541        return;
1542      }
1543#endif
1544
1545      lcp->transfer_i_to_C (i);
1546    }
1547    else {
1548      // we must push x(i) and w(i)
1549      for (;;) {
1550        // find direction to push on x(i)
1551        if (w[i] <= 0) {
1552          dir = 1;
1553          dirf = REAL(1.0);
1554        }
1555        else {
1556          dir = -1;
1557          dirf = REAL(-1.0);
1558        }
1559
1560        // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1561        lcp->solve1 (delta_x,i,dir);
1562
1563#ifdef dUSE_MALLOC_FOR_ALLOCA
1564        if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1565          UNALLOCA(state);
1566          UNALLOCA(C);
1567          UNALLOCA(p);
1568          UNALLOCA(Arows);
1569          UNALLOCA(ell);
1570          UNALLOCA(Dell);
1571          UNALLOCA(delta_w);
1572          UNALLOCA(delta_x);
1573          UNALLOCA(d);
1574          UNALLOCA(L);
1575          return;
1576        }
1577#endif
1578
1579        // note that delta_x[i] = dirf, but we wont bother to set it
1580
1581        // compute: delta_w = A*delta_x ... note we only care about
1582        // delta_w(N) and delta_w(i), the rest is ignored
1583        lcp->pN_equals_ANC_times_qC (delta_w,delta_x);
1584        lcp->pN_plusequals_ANi (delta_w,i,dir);
1585        delta_w[i] = lcp->AiC_times_qC (i,delta_x) + lcp->Aii(i)*dirf;
1586
1587        // find largest step we can take (size=s), either to drive x(i),w(i)
1588        // to the valid LCP region or to drive an already-valid variable
1589        // outside the valid region.
1590
1591        int cmd = 1;            // index switching command
1592        int si = 0;             // si = index to switch if cmd>3
1593        dReal s = -w[i]/delta_w[i];
1594        if (dir > 0) {
1595          if (hi[i] < dInfinity) {
1596            dReal s2 = (hi[i]-x[i])/dirf;               // step to x(i)=hi(i)
1597            if (s2 < s) {
1598              s = s2;
1599              cmd = 3;
1600            }
1601          }
1602        }
1603        else {
1604          if (lo[i] > -dInfinity) {
1605            dReal s2 = (lo[i]-x[i])/dirf;               // step to x(i)=lo(i)
1606            if (s2 < s) {
1607              s = s2;
1608              cmd = 2;
1609            }
1610          }
1611        }
1612
1613        for (k=0; k < lcp->numN(); k++) {
1614          if ((state[lcp->indexN(k)]==0 && delta_w[lcp->indexN(k)] < 0) ||
1615              (state[lcp->indexN(k)]!=0 && delta_w[lcp->indexN(k)] > 0)) {
1616            // don't bother checking if lo=hi=0
1617            if (lo[lcp->indexN(k)] == 0 && hi[lcp->indexN(k)] == 0) continue;
1618            dReal s2 = -w[lcp->indexN(k)] / delta_w[lcp->indexN(k)];
1619            if (s2 < s) {
1620              s = s2;
1621              cmd = 4;
1622              si = lcp->indexN(k);
1623            }
1624          }
1625        }
1626
1627        for (k=nub; k < lcp->numC(); k++) {
1628          if (delta_x[lcp->indexC(k)] < 0 && lo[lcp->indexC(k)] > -dInfinity) {
1629            dReal s2 = (lo[lcp->indexC(k)]-x[lcp->indexC(k)]) /
1630              delta_x[lcp->indexC(k)];
1631            if (s2 < s) {
1632              s = s2;
1633              cmd = 5;
1634              si = lcp->indexC(k);
1635            }
1636          }
1637          if (delta_x[lcp->indexC(k)] > 0 && hi[lcp->indexC(k)] < dInfinity) {
1638            dReal s2 = (hi[lcp->indexC(k)]-x[lcp->indexC(k)]) /
1639              delta_x[lcp->indexC(k)];
1640            if (s2 < s) {
1641              s = s2;
1642              cmd = 6;
1643              si = lcp->indexC(k);
1644            }
1645          }
1646        }
1647
1648        //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
1649        //                           "C->NL","C->NH"};
1650        //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
1651
1652        // if s <= 0 then we've got a problem. if we just keep going then
1653        // we're going to get stuck in an infinite loop. instead, just cross
1654        // our fingers and exit with the current solution.
1655        if (s <= 0) {
1656          dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s);
1657          if (i < (n-1)) {
1658            dSetZero (x+i,n-i);
1659            dSetZero (w+i,n-i);
1660          }
1661          goto done;
1662        }
1663
1664        // apply x = x + s * delta_x
1665        lcp->pC_plusequals_s_times_qC (x,s,delta_x);
1666        x[i] += s * dirf;
1667
1668        // apply w = w + s * delta_w
1669        lcp->pN_plusequals_s_times_qN (w,s,delta_w);
1670        w[i] += s * delta_w[i];
1671
1672        // switch indexes between sets if necessary
1673        switch (cmd) {
1674        case 1:         // done
1675          w[i] = 0;
1676          lcp->transfer_i_to_C (i);
1677          break;
1678        case 2:         // done
1679          x[i] = lo[i];
1680          state[i] = 0;
1681          lcp->transfer_i_to_N (i);
1682          break;
1683        case 3:         // done
1684          x[i] = hi[i];
1685          state[i] = 1;
1686          lcp->transfer_i_to_N (i);
1687          break;
1688        case 4:         // keep going
1689          w[si] = 0;
1690          lcp->transfer_i_from_N_to_C (si);
1691          break;
1692        case 5:         // keep going
1693          x[si] = lo[si];
1694          state[si] = 0;
1695          lcp->transfer_i_from_C_to_N (si);
1696          break;
1697        case 6:         // keep going
1698          x[si] = hi[si];
1699          state[si] = 1;
1700          lcp->transfer_i_from_C_to_N (si);
1701          break;
1702        }
1703
1704        if (cmd <= 3) break;
1705      }
1706    }
1707  }
1708
1709 done:
1710  lcp->unpermute();
1711  delete lcp;
1712
1713  UNALLOCA (L);
1714  UNALLOCA (d);
1715  UNALLOCA (delta_x);
1716  UNALLOCA (delta_w);
1717  UNALLOCA (Dell);
1718  UNALLOCA (ell);
1719  UNALLOCA (Arows);
1720  UNALLOCA (p);
1721  UNALLOCA (C);
1722  UNALLOCA (state);
1723}
1724
1725//***************************************************************************
1726// accuracy and timing test
1727
1728extern "C" ODE_API void dTestSolveLCP()
1729{
1730  int n = 100;
1731  int i,nskip = dPAD(n);
1732#ifdef dDOUBLE
1733  const dReal tol = REAL(1e-9);
1734#endif
1735#ifdef dSINGLE
1736  const dReal tol = REAL(1e-4);
1737#endif
1738  printf ("dTestSolveLCP()\n");
1739
1740  ALLOCA (dReal,A,n*nskip*sizeof(dReal));
1741#ifdef dUSE_MALLOC_FOR_ALLOCA
1742    if (A == NULL) {
1743      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1744      return;
1745    }
1746#endif
1747  ALLOCA (dReal,x,n*sizeof(dReal));
1748#ifdef dUSE_MALLOC_FOR_ALLOCA
1749    if (x == NULL) {
1750      UNALLOCA (A);
1751      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1752      return;
1753    }
1754#endif
1755  ALLOCA (dReal,b,n*sizeof(dReal));
1756#ifdef dUSE_MALLOC_FOR_ALLOCA
1757    if (b == NULL) {
1758      UNALLOCA (x);
1759      UNALLOCA (A);
1760      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1761      return;
1762    }
1763#endif
1764  ALLOCA (dReal,w,n*sizeof(dReal));
1765#ifdef dUSE_MALLOC_FOR_ALLOCA
1766    if (w == NULL) {
1767      UNALLOCA (b);
1768      UNALLOCA (x);
1769      UNALLOCA (A);
1770      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1771      return;
1772    }
1773#endif
1774  ALLOCA (dReal,lo,n*sizeof(dReal));
1775#ifdef dUSE_MALLOC_FOR_ALLOCA
1776    if (lo == NULL) {
1777      UNALLOCA (w);
1778      UNALLOCA (b);
1779      UNALLOCA (x);
1780      UNALLOCA (A);
1781      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1782      return;
1783    }
1784#endif
1785  ALLOCA (dReal,hi,n*sizeof(dReal));
1786#ifdef dUSE_MALLOC_FOR_ALLOCA
1787    if (hi == NULL) {
1788      UNALLOCA (lo);
1789      UNALLOCA (w);
1790      UNALLOCA (b);
1791      UNALLOCA (x);
1792      UNALLOCA (A);
1793      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1794      return;
1795    }
1796#endif
1797
1798  ALLOCA (dReal,A2,n*nskip*sizeof(dReal));
1799#ifdef dUSE_MALLOC_FOR_ALLOCA
1800    if (A2 == NULL) {
1801      UNALLOCA (hi);
1802      UNALLOCA (lo);
1803      UNALLOCA (w);
1804      UNALLOCA (b);
1805      UNALLOCA (x);
1806      UNALLOCA (A);
1807      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1808      return;
1809    }
1810#endif
1811  ALLOCA (dReal,b2,n*sizeof(dReal));
1812#ifdef dUSE_MALLOC_FOR_ALLOCA
1813    if (b2 == NULL) {
1814      UNALLOCA (A2);
1815      UNALLOCA (hi);
1816      UNALLOCA (lo);
1817      UNALLOCA (w);
1818      UNALLOCA (b);
1819      UNALLOCA (x);
1820      UNALLOCA (A);
1821      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1822      return;
1823    }
1824#endif
1825  ALLOCA (dReal,lo2,n*sizeof(dReal));
1826#ifdef dUSE_MALLOC_FOR_ALLOCA
1827    if (lo2 == NULL) {
1828      UNALLOCA (b2);
1829      UNALLOCA (A2);
1830      UNALLOCA (hi);
1831      UNALLOCA (lo);
1832      UNALLOCA (w);
1833      UNALLOCA (b);
1834      UNALLOCA (x);
1835      UNALLOCA (A);
1836      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1837      return;
1838    }
1839#endif
1840  ALLOCA (dReal,hi2,n*sizeof(dReal));
1841#ifdef dUSE_MALLOC_FOR_ALLOCA
1842    if (hi2 == NULL) {
1843      UNALLOCA (lo2);
1844      UNALLOCA (b2);
1845      UNALLOCA (A2);
1846      UNALLOCA (hi);
1847      UNALLOCA (lo);
1848      UNALLOCA (w);
1849      UNALLOCA (b);
1850      UNALLOCA (x);
1851      UNALLOCA (A);
1852      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1853      return;
1854    }
1855#endif
1856  ALLOCA (dReal,tmp1,n*sizeof(dReal));
1857#ifdef dUSE_MALLOC_FOR_ALLOCA
1858    if (tmp1 == NULL) {
1859      UNALLOCA (hi2);
1860      UNALLOCA (lo2);
1861      UNALLOCA (b2);
1862      UNALLOCA (A2);
1863      UNALLOCA (hi);
1864      UNALLOCA (lo);
1865      UNALLOCA (w);
1866      UNALLOCA (b);
1867      UNALLOCA (x);
1868      UNALLOCA (A);
1869      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1870      return;
1871    }
1872#endif
1873  ALLOCA (dReal,tmp2,n*sizeof(dReal));
1874#ifdef dUSE_MALLOC_FOR_ALLOCA
1875    if (tmp2 == NULL) {
1876      UNALLOCA (tmp1);
1877      UNALLOCA (hi2);
1878      UNALLOCA (lo2);
1879      UNALLOCA (b2);
1880      UNALLOCA (A2);
1881      UNALLOCA (hi);
1882      UNALLOCA (lo);
1883      UNALLOCA (w);
1884      UNALLOCA (b);
1885      UNALLOCA (x);
1886      UNALLOCA (A);
1887      dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1888      return;
1889    }
1890#endif
1891
1892  double total_time = 0;
1893  for (int count=0; count < 1000; count++) {
1894
1895    // form (A,b) = a random positive definite LCP problem
1896    dMakeRandomMatrix (A2,n,n,1.0);
1897    dMultiply2 (A,A2,A2,n,n,n);
1898    dMakeRandomMatrix (x,n,1,1.0);
1899    dMultiply0 (b,A,x,n,n,1);
1900    for (i=0; i<n; i++) b[i] += (dRandReal()*REAL(0.2))-REAL(0.1);
1901
1902    // choose `nub' in the range 0..n-1
1903    int nub = 50; //dRandInt (n);
1904
1905    // make limits
1906    for (i=0; i<nub; i++) lo[i] = -dInfinity;
1907    for (i=0; i<nub; i++) hi[i] = dInfinity;
1908    //for (i=nub; i<n; i++) lo[i] = 0;
1909    //for (i=nub; i<n; i++) hi[i] = dInfinity;
1910    //for (i=nub; i<n; i++) lo[i] = -dInfinity;
1911    //for (i=nub; i<n; i++) hi[i] = 0;
1912    for (i=nub; i<n; i++) lo[i] = -(dRandReal()*REAL(1.0))-REAL(0.01);
1913    for (i=nub; i<n; i++) hi[i] =  (dRandReal()*REAL(1.0))+REAL(0.01);
1914
1915    // set a few limits to lo=hi=0
1916    /*
1917    for (i=0; i<10; i++) {
1918      int j = dRandInt (n-nub) + nub;
1919      lo[j] = 0;
1920      hi[j] = 0;
1921    }
1922    */
1923
1924    // solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
1925    // SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
1926    // ensure that it doesn't get referenced (if it does, the answer will be
1927    // wrong).
1928
1929    memcpy (A2,A,n*nskip*sizeof(dReal));
1930    dClearUpperTriangle (A2,n);
1931    memcpy (b2,b,n*sizeof(dReal));
1932    memcpy (lo2,lo,n*sizeof(dReal));
1933    memcpy (hi2,hi,n*sizeof(dReal));
1934    dSetZero (x,n);
1935    dSetZero (w,n);
1936
1937    dStopwatch sw;
1938    dStopwatchReset (&sw);
1939    dStopwatchStart (&sw);
1940
1941    dSolveLCP (n,A2,x,b2,w,nub,lo2,hi2,0);
1942#ifdef dUSE_MALLOC_FOR_ALLOCA
1943    if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1944      UNALLOCA (tmp2);
1945      UNALLOCA (tmp1);
1946      UNALLOCA (hi2);
1947      UNALLOCA (lo2);
1948      UNALLOCA (b2);
1949      UNALLOCA (A2);
1950      UNALLOCA (hi);
1951      UNALLOCA (lo);
1952      UNALLOCA (w);
1953      UNALLOCA (b);
1954      UNALLOCA (x);
1955      UNALLOCA (A);
1956      return;
1957    }
1958#endif
1959
1960    dStopwatchStop (&sw);
1961    double time = dStopwatchTime(&sw);
1962    total_time += time;
1963    double average = total_time / double(count+1) * 1000.0;
1964
1965    // check the solution
1966
1967    dMultiply0 (tmp1,A,x,n,n,1);
1968    for (i=0; i<n; i++) tmp2[i] = b[i] + w[i];
1969    dReal diff = dMaxDifference (tmp1,tmp2,n,1);
1970    // printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
1971    //      diff > tol ? "FAILED" : "passed");
1972    if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff);
1973    int n1=0,n2=0,n3=0;
1974    for (i=0; i<n; i++) {
1975      if (x[i]==lo[i] && w[i] >= 0) {
1976        n1++;   // ok
1977      }
1978      else if (x[i]==hi[i] && w[i] <= 0) {
1979        n2++;   // ok
1980      }
1981      else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) {
1982        n3++;   // ok
1983      }
1984      else {
1985        dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i,
1986                x[i],w[i],lo[i],hi[i]);
1987      }
1988    }
1989
1990    // pacifier
1991    printf ("passed: NL=%3d NH=%3d C=%3d   ",n1,n2,n3);
1992    printf ("time=%10.3f ms  avg=%10.4f\n",time * 1000.0,average);
1993  }
1994
1995  UNALLOCA (A);
1996  UNALLOCA (x);
1997  UNALLOCA (b);
1998  UNALLOCA (w);
1999  UNALLOCA (lo);
2000  UNALLOCA (hi);
2001  UNALLOCA (A2);
2002  UNALLOCA (b2);
2003  UNALLOCA (lo2);
2004  UNALLOCA (hi2);
2005  UNALLOCA (tmp1);
2006  UNALLOCA (tmp2);
2007}
Note: See TracBrowser for help on using the repository browser.