Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/branches/ode/ode-0.9/ode/src/lcp.cpp @ 216

Last change on this file since 216 was 216, checked in by mathiask, 16 years ago
[Physik] add ode-0.9
File size: 52.0 KB

Line
1	/*************************************************************************
2	* *
3	* Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4	* All rights reserved. Email: russ@q12.org Web: www.q12.org *
5	* *
6	* This library is free software; you can redistribute it and/or *
7	* modify it under the terms of EITHER: *
8	* (1) The GNU Lesser General Public License as published by the Free *
9	* Software Foundation; either version 2.1 of the License, or (at *
10	* your option) any later version. The text of the GNU Lesser *
11	* General Public License is included with this library in the *
12	* file LICENSE.TXT. *
13	* (2) The BSD-style license that is included with this library in *
14	* the file LICENSE-BSD.TXT. *
15	* *
16	* This library is distributed in the hope that it will be useful, *
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of *
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19	* LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20	* *
21	*************************************************************************/
22
23	/*
24
25
26	THE ALGORITHM
27	-------------
28
29	solve A*x = b+w, with x and w subject to certain LCP conditions.
30	each x(i),w(i) must lie on one of the three line segments in the following
31	diagram. each line segment corresponds to one index set :
32
33	w(i)
34	/\|\ \| :
35	\| \| :
36	\| \|i in N :
37	w>0 \| \|state[i]=0 :
38	\| \| :
39	\| \| : i in C
40	w=0 + +-----------------------+
41	\| : \|
42	\| : \|
43	w<0 \| : \|i in N
44	\| : \|state[i]=1
45	\| : \|
46	\| : \|
47	+-------\|-----------\|-----------\|----------> x(i)
48	lo 0 hi
49
50	the Dantzig algorithm proceeds as follows:
51	for i=1:n
52	* if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53	negative towards the line. as this is done, the other (x(j),w(j))
54	for j<i are constrained to be on the line. if any (x,w) reaches the
55	end of a line segment then it is switched between index sets.
56	* i is added to the appropriate index set depending on what line segment
57	it hits.
58
59	we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60	simpler, because the starting point for x(i),w(i) is always on the dotted
61	line x=0 and x will only ever increase in one direction, so it can only hit
62	two out of the three line segments.
63
64
65	NOTES
66	-----
67
68	this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69	the implementation is split into an LCP problem object (dLCP) and an LCP
70	driver function. most optimization occurs in the dLCP object.
71
72	a naive implementation of the algorithm requires either a lot of data motion
73	or a lot of permutation-array lookup, because we are constantly re-ordering
74	rows and columns. to avoid this and make a more optimized algorithm, a
75	non-trivial data structure is used to represent the matrix A (this is
76	implemented in the fast version of the dLCP object).
77
78	during execution of this algorithm, some indexes in A are clamped (set C),
79	some are non-clamped (set N), and some are "don't care" (where x=0).
80	A,x,b,w (and other problem vectors) are permuted such that the clamped
81	indexes are first, the unclamped indexes are next, and the don't-care
82	indexes are last. this permutation is recorded in the array `p'.
83	initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84	the corresponding elements of p are swapped.
85
86	because the C and N elements are grouped together in the rows of A, we can do
87	lots of work with a fast dot product function. if A,x,etc were not permuted
88	and we only had a permutation array, then those dot products would be much
89	slower as we would have a permutation array lookup in some inner loops.
90
91	A is accessed through an array of row pointers, so that element (i,j) of the
92	permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93	we still have to actually move the data.
94
95	during execution of this algorithm we maintain an LDL' factorization of
96	the clamped submatrix of A (call it `AC') which is the top left nC*nC
97	submatrix of A. there are two ways we could arrange the rows/columns in AC.
98
99	(1) AC is always permuted such that LDL' = AC. this causes a problem
100	when a row/column is removed from C, because then all the rows/columns of A
101	between the deleted index and the end of C need to be rotated downward.
102	this results in a lot of data motion and slows things down.
103	(2) LDL' is actually a factorization of a permutation of AC (which is
104	itself a permutation of the underlying A). this is what we do - the
105	permutation is recorded in the vector C. call this permutation A[C,C].
106	when a row/column is removed from C, all we have to do is swap two
107	rows/columns and manipulate C.
108
109	*/
110
111	#include <ode/common.h>
112	#include "lcp.h"
113	#include <ode/matrix.h>
114	#include <ode/misc.h>
115	#include "mat.h" // for testing
116	#include <ode/timer.h> // for testing
117
118	//***************************************************************************
119	// code generation parameters
120
121	// LCP debugging (mosty for fast dLCP) - this slows things down a lot
122	//#define DEBUG_LCP
123
124	//#define dLCP_SLOW // use slow dLCP object
125	#define dLCP_FAST // use fast dLCP object
126
127	// option 1 : matrix row pointers (less data copying)
128	#define ROWPTRS
129	#define ATYPE dReal **
130	#define AROW(i) (A[i])
131
132	// option 2 : no matrix row pointers (slightly faster inner loops)
133	//#define NOROWPTRS
134	//#define ATYPE dReal *
135	//#define AROW(i) (A+(i)*nskip)
136
137	// use protected, non-stack memory allocation system
138
139	#ifdef dUSE_MALLOC_FOR_ALLOCA
140	extern unsigned int dMemoryFlag;
141
142	#define ALLOCA(t,v,s) t* v = (t*) malloc(s)
143	#define UNALLOCA(t) free(t)
144
145	#else
146
147	#define ALLOCA(t,v,s) t* v =(t*)dALLOCA16(s)
148	#define UNALLOCA(t) /* nothing */
149
150	#endif
151
152	#define NUB_OPTIMIZATIONS
153
154	//***************************************************************************
155
156	// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
157	// A is nskip. this only references and swaps the lower triangle.
158	// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
159	// rows will be swapped by exchanging row pointers. otherwise the data will
160	// be copied.
161
162	static void swapRowsAndCols (ATYPE A, int n, int i1, int i2, int nskip,
163	int do_fast_row_swaps)
164	{
165	int i;
166	dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
167	nskip >= n && i1 < i2);
168
169	# ifdef ROWPTRS
170	for (i=i1+1; i<i2; i++) A[i1][i] = A[i][i1];
171	for (i=i1+1; i<i2; i++) A[i][i1] = A[i2][i];
172	A[i1][i2] = A[i1][i1];
173	A[i1][i1] = A[i2][i1];
174	A[i2][i1] = A[i2][i2];
175	// swap rows, by swapping row pointers
176	if (do_fast_row_swaps) {
177	dReal *tmpp;
178	tmpp = A[i1];
179	A[i1] = A[i2];
180	A[i2] = tmpp;
181	}
182	else {
183	ALLOCA (dReal,tmprow,n * sizeof(dReal));
184
185	#ifdef dUSE_MALLOC_FOR_ALLOCA
186	if (tmprow == NULL) {
187	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
188	return;
189	}
190	#endif
191
192	memcpy (tmprow,A[i1],n * sizeof(dReal));
193	memcpy (A[i1],A[i2],n * sizeof(dReal));
194	memcpy (A[i2],tmprow,n * sizeof(dReal));
195	UNALLOCA(tmprow);
196	}
197	// swap columns the hard way
198	for (i=i2+1; i<n; i++) {
199	dReal tmp = A[i][i1];
200	A[i][i1] = A[i][i2];
201	A[i][i2] = tmp;
202	}
203	# else
204	dReal tmp;
205	ALLOCA (dReal,tmprow,n * sizeof(dReal));
206
207	#ifdef dUSE_MALLOC_FOR_ALLOCA
208	if (tmprow == NULL) {
209	return;
210	}
211	#endif
212
213	if (i1 > 0) {
214	memcpy (tmprow,A+i1nskip,i1sizeof(dReal));
215	memcpy (A+i1nskip,A+i2nskip,i1*sizeof(dReal));
216	memcpy (A+i2nskip,tmprow,i1sizeof(dReal));
217	}
218	for (i=i1+1; i<i2; i++) {
219	tmp = A[i2*nskip+i];
220	A[i2nskip+i] = A[inskip+i1];
221	A[i*nskip+i1] = tmp;
222	}
223	tmp = A[i1*nskip+i1];
224	A[i1nskip+i1] = A[i2nskip+i2];
225	A[i2*nskip+i2] = tmp;
226	for (i=i2+1; i<n; i++) {
227	tmp = A[i*nskip+i1];
228	A[inskip+i1] = A[inskip+i2];
229	A[i*nskip+i2] = tmp;
230	}
231	UNALLOCA(tmprow);
232	# endif
233
234	}
235
236
237	// swap two indexes in the n*n LCP problem. i1 must be <= i2.
238
239	static void swapProblem (ATYPE A, dReal x, dReal b, dReal w, dReal lo,
240	dReal hi, int p, int state, int findex,
241	int n, int i1, int i2, int nskip,
242	int do_fast_row_swaps)
243	{
244	dReal tmp;
245	int tmpi;
246	dIASSERT (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n &&
247	i1 <= i2);
248	if (i1==i2) return;
249	swapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps);
250	#ifdef dUSE_MALLOC_FOR_ALLOCA
251	if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY)
252	return;
253	#endif
254	tmp = x[i1];
255	x[i1] = x[i2];
256	x[i2] = tmp;
257	tmp = b[i1];
258	b[i1] = b[i2];
259	b[i2] = tmp;
260	tmp = w[i1];
261	w[i1] = w[i2];
262	w[i2] = tmp;
263	tmp = lo[i1];
264	lo[i1] = lo[i2];
265	lo[i2] = tmp;
266	tmp = hi[i1];
267	hi[i1] = hi[i2];
268	hi[i2] = tmp;
269	tmpi = p[i1];
270	p[i1] = p[i2];
271	p[i2] = tmpi;
272	tmpi = state[i1];
273	state[i1] = state[i2];
274	state[i2] = tmpi;
275	if (findex) {
276	tmpi = findex[i1];
277	findex[i1] = findex[i2];
278	findex[i2] = tmpi;
279	}
280	}
281
282
283	// for debugging - check that L,d is the factorization of A[C,C].
284	// A[C,C] has size nC*nC and leading dimension nskip.
285	// L has size nC*nC and leading dimension nskip.
286	// d has size nC.
287
288	#ifdef DEBUG_LCP
289
290	static void checkFactorization (ATYPE A, dReal _L, dReal _d,
291	int nC, int *C, int nskip)
292	{
293	int i,j;
294	if (nC==0) return;
295
296	// get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
297	dMatrix A1 (nC,nC);
298	for (i=0; i<nC; i++) {
299	for (j=0; j<=i; j++) A1(i,j) = A1(j,i) = AROW(i)[j];
300	}
301	dMatrix A2 = A1.select (nC,C,nC,C);
302
303	// printf ("A1=\n"); A1.print(); printf ("\n");
304	// printf ("A2=\n"); A2.print(); printf ("\n");
305
306	// compute A3 = LDL'
307	dMatrix L (nC,nC,_L,nskip,1);
308	dMatrix D (nC,nC);
309	for (i=0; i<nC; i++) D(i,i) = 1/_d[i];
310	L.clearUpperTriangle();
311	for (i=0; i<nC; i++) L(i,i) = 1;
312	dMatrix A3 = L * D * L.transpose();
313
314	// printf ("L=\n"); L.print(); printf ("\n");
315	// printf ("D=\n"); D.print(); printf ("\n");
316	// printf ("A3=\n"); A2.print(); printf ("\n");
317
318	// compare A2 and A3
319	dReal diff = A2.maxDifference (A3);
320	if (diff > 1e-8)
321	dDebug (0,"LDL' check, maximum difference = %.6e\n",diff);
322	}
323
324	#endif
325
326
327	// for debugging
328
329	#ifdef DEBUG_LCP
330
331	static void checkPermutations (int i, int n, int nC, int nN, int p, int C)
332	{
333	int j,k;
334	dIASSERT (nC>=0 && nN>=0 && (nC+nN)==i && i < n);
335	for (k=0; k<i; k++) dIASSERT (p[k] >= 0 && p[k] < i);
336	for (k=i; k<n; k++) dIASSERT (p[k] == k);
337	for (j=0; j<nC; j++) {
338	int C_is_bad = 1;
339	for (k=0; k<nC; k++) if (C[k]==j) C_is_bad = 0;
340	dIASSERT (C_is_bad==0);
341	}
342	}
343
344	#endif
345
346	//***************************************************************************
347	// dLCP manipulator object. this represents an n*n LCP problem.
348	//
349	// two index sets C and N are kept. each set holds a subset of
350	// the variable indexes 0..n-1. an index can only be in one set.
351	// initially both sets are empty.
352	//
353	// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
354
355	#ifdef dLCP_SLOW
356
357	// simple but slow implementation of dLCP, for testing the LCP drivers.
358
359	#include "array.h"
360
361	struct dLCP {
362	int n,nub,nskip;
363	dReal Adata,x,b,w,lo,hi; // LCP problem data
364	ATYPE A; // A rows
365	dArray<int> C,N; // index sets
366	int last_i_for_solve1; // last i value given to solve1
367
368	dLCP (int _n, int _nub, dReal _Adata, dReal _x, dReal _b, dReal _w,
369	dReal _lo, dReal _hi, dReal _L, dReal _d,
370	dReal _Dell, dReal _ell, dReal *_tmp,
371	int _state, int _findex, int _p, int _C, dReal **Arows);
372	// the constructor is given an initial problem description (A,x,b,w) and
373	// space for other working data (which the caller may allocate on the stack).
374	// some of this data is specific to the fast dLCP implementation.
375	// the matrices A and L have size nn, vectors have size n1.
376	// A represents a symmetric matrix but only the lower triangle is valid.
377	// `nub' is the number of unbounded indexes at the start. all the indexes
378	// 0..nub-1 will be put into C.
379
380	~dLCP();
381
382	int getNub() { return nub; }
383	// return the value of `nub'. the constructor may want to change it,
384	// so the caller should find out its new value.
385
386	// transfer functions: transfer index i to the given set (C or N). indexes
387	// less than `nub' can never be given. A,x,b,w,etc may be permuted by these
388	// functions, the caller must be robust to this.
389
390	void transfer_i_to_C (int i);
391	// this assumes C and N span 1:i-1. this also assumes that solve1() has
392	// been recently called for the same i without any other transfer
393	// functions in between (thereby allowing some data reuse for the fast
394	// implementation).
395	void transfer_i_to_N (int i);
396	// this assumes C and N span 1:i-1.
397	void transfer_i_from_N_to_C (int i);
398	void transfer_i_from_C_to_N (int i);
399
400	int numC();
401	int numN();
402	// return the number of indexes in set C/N
403
404	int indexC (int i);
405	int indexN (int i);
406	// return index i in set C/N.
407
408	// accessor and arithmetic functions. Aij translates as A(i,j), etc.
409	// make sure that only the lower triangle of A is ever referenced.
410
411	dReal Aii (int i);
412	dReal AiC_times_qC (int i, dReal *q);
413	dReal AiN_times_qN (int i, dReal *q); // for all Nj
414	void pN_equals_ANC_times_qC (dReal p, dReal q); // for all Nj
415	void pN_plusequals_ANi (dReal *p, int i, int sign=1);
416	// for all Nj. sign = +1,-1. assumes i > maximum index in N.
417	void pC_plusequals_s_times_qC (dReal p, dReal s, dReal q);
418	void pN_plusequals_s_times_qN (dReal p, dReal s, dReal q); // for all Nj
419	void solve1 (dReal *a, int i, int dir=1, int only_transfer=0);
420	// get a(C) = - dir * A(C,C) \ A(C,i). dir must be +/- 1.
421	// the fast version of this function computes some data that is needed by
422	// transfer_i_to_C(). if only_transfer is nonzero then this function
423	// only computes that data, it does not set a(C).
424
425	void unpermute();
426	// call this at the end of the LCP function. if the x/w values have been
427	// permuted then this will unscramble them.
428	};
429
430
431	dLCP::dLCP (int _n, int _nub, dReal _Adata, dReal _x, dReal _b, dReal _w,
432	dReal _lo, dReal _hi, dReal _L, dReal _d,
433	dReal _Dell, dReal _ell, dReal *_tmp,
434	int _state, int _findex, int _p, int _C, dReal **Arows)
435	{
436	dUASSERT (_findex==0,"slow dLCP object does not support findex array");
437
438	n = _n;
439	nub = _nub;
440	Adata = _Adata;
441	A = 0;
442	x = _x;
443	b = _b;
444	w = _w;
445	lo = _lo;
446	hi = _hi;
447	nskip = dPAD(n);
448	dSetZero (x,n);
449	last_i_for_solve1 = -1;
450
451	int i,j;
452	C.setSize (n);
453	N.setSize (n);
454	for (i=0; i<n; i++) {
455	C[i] = 0;
456	N[i] = 0;
457	}
458
459	# ifdef ROWPTRS
460	// make matrix row pointers
461	A = Arows;
462	for (i=0; i<n; i++) A[i] = Adata + i*nskip;
463	# else
464	A = Adata;
465	# endif
466
467	// lets make A symmetric
468	for (i=0; i<n; i++) {
469	for (j=i+1; j<n; j++) AROW(i)[j] = AROW(j)[i];
470	}
471
472	// if nub>0, put all indexes 0..nub-1 into C and solve for x
473	if (nub > 0) {
474	for (i=0; i<nub; i++) memcpy (_L+inskip,AROW(i),(i+1)sizeof(dReal));
475	dFactorLDLT (_L,_d,nub,nskip);
476	memcpy (x,b,nub*sizeof(dReal));
477	dSolveLDLT (_L,_d,x,nub,nskip);
478	dSetZero (_w,nub);
479	for (i=0; i<nub; i++) C[i] = 1;
480	}
481	}
482
483
484	dLCP::~dLCP()
485	{
486	}
487
488
489	void dLCP::transfer_i_to_C (int i)
490	{
491	if (i < nub) dDebug (0,"bad i");
492	if (C[i]) dDebug (0,"i already in C");
493	if (N[i]) dDebug (0,"i already in N");
494	for (int k=0; k<i; k++) {
495	if (!(C[k] ^ N[k])) dDebug (0,"assumptions for C and N violated");
496	}
497	for (int k=i; k<n; k++)
498	if (C[k] \|\| N[k]) dDebug (0,"assumptions for C and N violated");
499	if (i != last_i_for_solve1) dDebug (0,"assumptions for i violated");
500	last_i_for_solve1 = -1;
501	C[i] = 1;
502	}
503
504
505	void dLCP::transfer_i_to_N (int i)
506	{
507	if (i < nub) dDebug (0,"bad i");
508	if (C[i]) dDebug (0,"i already in C");
509	if (N[i]) dDebug (0,"i already in N");
510	for (int k=0; k<i; k++)
511	if (!C[k] && !N[k]) dDebug (0,"assumptions for C and N violated");
512	for (int k=i; k<n; k++)
513	if (C[k] \|\| N[k]) dDebug (0,"assumptions for C and N violated");
514	last_i_for_solve1 = -1;
515	N[i] = 1;
516	}
517
518
519	void dLCP::transfer_i_from_N_to_C (int i)
520	{
521	if (i < nub) dDebug (0,"bad i");
522	if (C[i]) dDebug (0,"i already in C");
523	if (!N[i]) dDebug (0,"i not in N");
524	last_i_for_solve1 = -1;
525	N[i] = 0;
526	C[i] = 1;
527	}
528
529
530	void dLCP::transfer_i_from_C_to_N (int i)
531	{
532	if (i < nub) dDebug (0,"bad i");
533	if (N[i]) dDebug (0,"i already in N");
534	if (!C[i]) dDebug (0,"i not in C");
535	last_i_for_solve1 = -1;
536	C[i] = 0;
537	N[i] = 1;
538	}
539
540
541	int dLCP::numC()
542	{
543	int i,count=0;
544	for (i=0; i<n; i++) if (C[i]) count++;
545	return count;
546	}
547
548
549	int dLCP::numN()
550	{
551	int i,count=0;
552	for (i=0; i<n; i++) if (N[i]) count++;
553	return count;
554	}
555
556
557	int dLCP::indexC (int i)
558	{
559	int k,count=0;
560	for (k=0; k<n; k++) {
561	if (C[k]) {
562	if (count==i) return k;
563	count++;
564	}
565	}
566	dDebug (0,"bad index C (%d)",i);
567	return 0;
568	}
569
570
571	int dLCP::indexN (int i)
572	{
573	int k,count=0;
574	for (k=0; k<n; k++) {
575	if (N[k]) {
576	if (count==i) return k;
577	count++;
578	}
579	}
580	dDebug (0,"bad index into N");
581	return 0;
582	}
583
584
585	dReal dLCP::Aii (int i)
586	{
587	return AROW(i)[i];
588	}
589
590
591	dReal dLCP::AiC_times_qC (int i, dReal *q)
592	{
593	dReal sum = 0;
594	for (int k=0; k<n; k++) if (C[k]) sum += AROW(i)[k] * q[k];
595	return sum;
596	}
597
598
599	dReal dLCP::AiN_times_qN (int i, dReal *q)
600	{
601	dReal sum = 0;
602	for (int k=0; k<n; k++) if (N[k]) sum += AROW(i)[k] * q[k];
603	return sum;
604	}
605
606
607	void dLCP::pN_equals_ANC_times_qC (dReal p, dReal q)
608	{
609	dReal sum;
610	for (int ii=0; ii<n; ii++) if (N[ii]) {
611	sum = 0;
612	for (int jj=0; jj<n; jj++) if (C[jj]) sum += AROW(ii)[jj] * q[jj];
613	p[ii] = sum;
614	}
615	}
616
617
618	void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign)
619	{
620	int k;
621	for (k=0; k<n; k++) if (N[k] && k >= i) dDebug (0,"N assumption violated");
622	if (sign > 0) {
623	for (k=0; k<n; k++) if (N[k]) p[k] += AROW(i)[k];
624	}
625	else {
626	for (k=0; k<n; k++) if (N[k]) p[k] -= AROW(i)[k];
627	}
628	}
629
630
631	void dLCP::pC_plusequals_s_times_qC (dReal p, dReal s, dReal q)
632	{
633	for (int k=0; k<n; k++) if (C[k]) p[k] += s*q[k];
634	}
635
636
637	void dLCP::pN_plusequals_s_times_qN (dReal p, dReal s, dReal q)
638	{
639	for (int k=0; k<n; k++) if (N[k]) p[k] += s*q[k];
640	}
641
642
643	void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer)
644	{
645
646	ALLOCA (dReal,AA,nnskipsizeof(dReal));
647	#ifdef dUSE_MALLOC_FOR_ALLOCA
648	if (AA == NULL) {
649	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
650	return;
651	}
652	#endif
653	ALLOCA (dReal,dd,n*sizeof(dReal));
654	#ifdef dUSE_MALLOC_FOR_ALLOCA
655	if (dd == NULL) {
656	UNALLOCA(AA);
657	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
658	return;
659	}
660	#endif
661	ALLOCA (dReal,bb,n*sizeof(dReal));
662	#ifdef dUSE_MALLOC_FOR_ALLOCA
663	if (bb == NULL) {
664	UNALLOCA(AA);
665	UNALLOCA(dd);
666	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
667	return;
668	}
669	#endif
670
671	int ii,jj,AAi,AAj;
672
673	last_i_for_solve1 = i;
674	AAi = 0;
675	for (ii=0; ii<n; ii++) if (C[ii]) {
676	AAj = 0;
677	for (jj=0; jj<n; jj++) if (C[jj]) {
678	AA[AAi*nskip+AAj] = AROW(ii)[jj];
679	AAj++;
680	}
681	bb[AAi] = AROW(i)[ii];
682	AAi++;
683	}
684	if (AAi==0) {
685	UNALLOCA (AA);
686	UNALLOCA (dd);
687	UNALLOCA (bb);
688	return;
689	}
690
691	dFactorLDLT (AA,dd,AAi,nskip);
692	dSolveLDLT (AA,dd,bb,AAi,nskip);
693
694	AAi=0;
695	if (dir > 0) {
696	for (ii=0; ii<n; ii++) if (C[ii]) a[ii] = -bb[AAi++];
697	}
698	else {
699	for (ii=0; ii<n; ii++) if (C[ii]) a[ii] = bb[AAi++];
700	}
701
702	UNALLOCA (AA);
703	UNALLOCA (dd);
704	UNALLOCA (bb);
705	}
706
707
708	void dLCP::unpermute()
709	{
710	}
711
712	#endif // dLCP_SLOW
713
714	//***************************************************************************
715	// fast implementation of dLCP. see the above definition of dLCP for
716	// interface comments.
717	//
718	// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
719	// permuted as the other vectors/matrices are permuted.
720	//
721	// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
722	// contiguous indexes. the don't-care indexes follow N.
723	//
724	// an LDL' factorization is maintained of A(C,C), and whenever indexes are
725	// added or removed from the set C the factorization is updated.
726	// thus LDL'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
727	// the leading dimension of the matrix L is always `nskip'.
728	//
729	// at the start there may be other indexes that are unbounded but are not
730	// included in `nub'. dLCP will permute the matrix so that absolutely all
731	// unbounded vectors are at the start. thus there may be some initial
732	// permutation.
733	//
734	// the algorithms here assume certain patterns, particularly with respect to
735	// index transfer.
736
737	#ifdef dLCP_FAST
738
739	struct dLCP {
740	int n,nskip,nub;
741	ATYPE A; // A rows
742	dReal Adata,x,b,w,lo,hi; // permuted LCP problem data
743	dReal L,d; // LDL' factorization of set C
744	dReal Dell,ell,*tmp;
745	int state,findex,p,C;
746	int nC,nN; // size of each index set
747
748	dLCP (int _n, int _nub, dReal _Adata, dReal _x, dReal _b, dReal _w,
749	dReal _lo, dReal _hi, dReal _L, dReal _d,
750	dReal _Dell, dReal _ell, dReal *_tmp,
751	int _state, int _findex, int _p, int _C, dReal **Arows);
752	int getNub() { return nub; }
753	void transfer_i_to_C (int i);
754	void transfer_i_to_N (int i)
755	{ nN++; } // because we can assume C and N span 1:i-1
756	void transfer_i_from_N_to_C (int i);
757	void transfer_i_from_C_to_N (int i);
758	int numC() { return nC; }
759	int numN() { return nN; }
760	int indexC (int i) { return i; }
761	int indexN (int i) { return i+nC; }
762	dReal Aii (int i) { return AROW(i)[i]; }
763	dReal AiC_times_qC (int i, dReal *q) { return dDot (AROW(i),q,nC); }
764	dReal AiN_times_qN (int i, dReal *q) { return dDot (AROW(i)+nC,q+nC,nN); }
765	void pN_equals_ANC_times_qC (dReal p, dReal q);
766	void pN_plusequals_ANi (dReal *p, int i, int sign=1);
767	void pC_plusequals_s_times_qC (dReal p, dReal s, dReal q)
768	{ for (int i=0; i<nC; i++) p[i] += s*q[i]; }
769	void pN_plusequals_s_times_qN (dReal p, dReal s, dReal q)
770	{ for (int i=0; i<nN; i++) p[i+nC] += s*q[i+nC]; }
771	void solve1 (dReal *a, int i, int dir=1, int only_transfer=0);
772	void unpermute();
773	};
774
775
776	dLCP::dLCP (int _n, int _nub, dReal _Adata, dReal _x, dReal _b, dReal _w,
777	dReal _lo, dReal _hi, dReal _L, dReal _d,
778	dReal _Dell, dReal _ell, dReal *_tmp,
779	int _state, int _findex, int _p, int _C, dReal **Arows)
780	{
781	n = _n;
782	nub = _nub;
783	Adata = _Adata;
784	A = 0;
785	x = _x;
786	b = _b;
787	w = _w;
788	lo = _lo;
789	hi = _hi;
790	L = _L;
791	d = _d;
792	Dell = _Dell;
793	ell = _ell;
794	tmp = _tmp;
795	state = _state;
796	findex = _findex;
797	p = _p;
798	C = _C;
799	nskip = dPAD(n);
800	dSetZero (x,n);
801
802	int k;
803
804	# ifdef ROWPTRS
805	// make matrix row pointers
806	A = Arows;
807	for (k=0; k<n; k++) A[k] = Adata + k*nskip;
808	# else
809	A = Adata;
810	# endif
811
812	nC = 0;
813	nN = 0;
814	for (k=0; k<n; k++) p[k]=k; // initially unpermuted
815
816	/*
817	// for testing, we can do some random swaps in the area i > nub
818	if (nub < n) {
819	for (k=0; k<100; k++) {
820	int i1,i2;
821	do {
822	i1 = dRandInt(n-nub)+nub;
823	i2 = dRandInt(n-nub)+nub;
824	}
825	while (i1 > i2);
826	//printf ("--> %d %d\n",i1,i2);
827	swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i1,i2,nskip,0);
828	}
829	}
830	*/
831
832	// permute the problem so that all the unbounded variables are at the
833	// start, i.e. look for unbounded variables not included in `nub'. we can
834	// potentially push up `nub' this way and get a bigger initial factorization.
835	// note that when we swap rows/cols here we must not just swap row pointers,
836	// as the initial factorization relies on the data being all in one chunk.
837	// variables that have findex >= 0 are not considered to be unbounded even
838	// if lo=-inf and hi=inf - this is because these limits may change during the
839	// solution process.
840
841	for (k=nub; k<n; k++) {
842	if (findex && findex[k] >= 0) continue;
843	if (lo[k]==-dInfinity && hi[k]==dInfinity) {
844	swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nub,k,nskip,0);
845	nub++;
846	}
847	}
848
849	// if there are unbounded variables at the start, factorize A up to that
850	// point and solve for x. this puts all indexes 0..nub-1 into C.
851	if (nub > 0) {
852	for (k=0; k<nub; k++) memcpy (L+knskip,AROW(k),(k+1)sizeof(dReal));
853	dFactorLDLT (L,d,nub,nskip);
854	memcpy (x,b,nub*sizeof(dReal));
855	dSolveLDLT (L,d,x,nub,nskip);
856	dSetZero (w,nub);
857	for (k=0; k<nub; k++) C[k] = k;
858	nC = nub;
859	}
860
861	// permute the indexes > nub such that all findex variables are at the end
862	if (findex) {
863	int num_at_end = 0;
864	for (k=n-1; k >= nub; k--) {
865	if (findex[k] >= 0) {
866	swapProblem (A,x,b,w,lo,hi,p,state,findex,n,k,n-1-num_at_end,nskip,1);
867	num_at_end++;
868	}
869	}
870	}
871
872	// print info about indexes
873	/*
874	for (k=0; k<n; k++) {
875	if (k<nub) printf ("C");
876	else if (lo[k]==-dInfinity && hi[k]==dInfinity) printf ("c");
877	else printf (".");
878	}
879	printf ("\n");
880	*/
881	}
882
883
884	void dLCP::transfer_i_to_C (int i)
885	{
886	int j;
887	if (nC > 0) {
888	// ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
889	for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j];
890	d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC));
891	}
892	else {
893	d[0] = dRecip (AROW(i)[i]);
894	}
895	swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1);
896	C[nC] = nC;
897	nC++;
898
899	# ifdef DEBUG_LCP
900	checkFactorization (A,L,d,nC,C,nskip);
901	if (i < (n-1)) checkPermutations (i+1,n,nC,nN,p,C);
902	# endif
903	}
904
905
906	void dLCP::transfer_i_from_N_to_C (int i)
907	{
908	int j;
909	if (nC > 0) {
910	dReal *aptr = AROW(i);
911	# ifdef NUB_OPTIMIZATIONS
912	// if nub>0, initial part of aptr unpermuted
913	for (j=0; j<nub; j++) Dell[j] = aptr[j];
914	for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]];
915	# else
916	for (j=0; j<nC; j++) Dell[j] = aptr[C[j]];
917	# endif
918	dSolveL1 (L,Dell,nC,nskip);
919	for (j=0; j<nC; j++) ell[j] = Dell[j] * d[j];
920	for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j];
921	d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC));
922	}
923	else {
924	d[0] = dRecip (AROW(i)[i]);
925	}
926	swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1);
927	C[nC] = nC;
928	nN--;
929	nC++;
930
931	// @@@ TO DO LATER
932	// if we just finish here then we'll go back and re-solve for
933	// delta_x. but actually we can be more efficient and incrementally
934	// update delta_x here. but if we do this, we wont have ell and Dell
935	// to use in updating the factorization later.
936
937	# ifdef DEBUG_LCP
938	checkFactorization (A,L,d,nC,C,nskip);
939	# endif
940	}
941
942
943	void dLCP::transfer_i_from_C_to_N (int i)
944	{
945	// remove a row/column from the factorization, and adjust the
946	// indexes (black magic!)
947	int j,k;
948	for (j=0; j<nC; j++) if (C[j]==i) {
949	dLDLTRemove (A,C,L,d,n,nC,j,nskip);
950	for (k=0; k<nC; k++) if (C[k]==nC-1) {
951	C[k] = C[j];
952	if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int));
953	break;
954	}
955	dIASSERT (k < nC);
956	break;
957	}
958	dIASSERT (j < nC);
959	swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i,nC-1,nskip,1);
960	nC--;
961	nN++;
962
963	# ifdef DEBUG_LCP
964	checkFactorization (A,L,d,nC,C,nskip);
965	# endif
966	}
967
968
969	void dLCP::pN_equals_ANC_times_qC (dReal p, dReal q)
970	{
971	// we could try to make this matrix-vector multiplication faster using
972	// outer product matrix tricks, e.g. with the dMultidotX() functions.
973	// but i tried it and it actually made things slower on random 100x100
974	// problems because of the overhead involved. so we'll stick with the
975	// simple method for now.
976	for (int i=0; i<nN; i++) p[i+nC] = dDot (AROW(i+nC),q,nC);
977	}
978
979
980	void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign)
981	{
982	dReal *aptr = AROW(i)+nC;
983	if (sign > 0) {
984	for (int i=0; i<nN; i++) p[i+nC] += aptr[i];
985	}
986	else {
987	for (int i=0; i<nN; i++) p[i+nC] -= aptr[i];
988	}
989	}
990
991
992	void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer)
993	{
994	// the `Dell' and `ell' that are computed here are saved. if index i is
995	// later added to the factorization then they can be reused.
996	//
997	// @@@ question: do we need to solve for entire delta_x??? yes, but
998	// only if an x goes below 0 during the step.
999
1000	int j;
1001	if (nC > 0) {
1002	dReal *aptr = AROW(i);
1003	# ifdef NUB_OPTIMIZATIONS
1004	// if nub>0, initial part of aptr[] is guaranteed unpermuted
1005	for (j=0; j<nub; j++) Dell[j] = aptr[j];
1006	for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]];
1007	# else
1008	for (j=0; j<nC; j++) Dell[j] = aptr[C[j]];
1009	# endif
1010	dSolveL1 (L,Dell,nC,nskip);
1011	for (j=0; j<nC; j++) ell[j] = Dell[j] * d[j];
1012
1013	if (!only_transfer) {
1014	for (j=0; j<nC; j++) tmp[j] = ell[j];
1015	dSolveL1T (L,tmp,nC,nskip);
1016	if (dir > 0) {
1017	for (j=0; j<nC; j++) a[C[j]] = -tmp[j];
1018	}
1019	else {
1020	for (j=0; j<nC; j++) a[C[j]] = tmp[j];
1021	}
1022	}
1023	}
1024	}
1025
1026
1027	void dLCP::unpermute()
1028	{
1029	// now we have to un-permute x and w
1030	int j;
1031	ALLOCA (dReal,tmp,n*sizeof(dReal));
1032	#ifdef dUSE_MALLOC_FOR_ALLOCA
1033	if (tmp == NULL) {
1034	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1035	return;
1036	}
1037	#endif
1038	memcpy (tmp,x,n*sizeof(dReal));
1039	for (j=0; j<n; j++) x[p[j]] = tmp[j];
1040	memcpy (tmp,w,n*sizeof(dReal));
1041	for (j=0; j<n; j++) w[p[j]] = tmp[j];
1042
1043	UNALLOCA (tmp);
1044	}
1045
1046	#endif // dLCP_FAST
1047
1048	//***************************************************************************
1049	// an unoptimized Dantzig LCP driver routine for the basic LCP problem.
1050	// must have lo=0, hi=dInfinity, and nub=0.
1051
1052	void dSolveLCPBasic (int n, dReal A, dReal x, dReal *b,
1053	dReal w, int nub, dReal lo, dReal *hi)
1054	{
1055	dAASSERT (n>0 && A && x && b && w && nub == 0);
1056
1057	int i,k;
1058	int nskip = dPAD(n);
1059	ALLOCA (dReal,L,nnskipsizeof(dReal));
1060	#ifdef dUSE_MALLOC_FOR_ALLOCA
1061	if (L == NULL) {
1062	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1063	return;
1064	}
1065	#endif
1066	ALLOCA (dReal,d,n*sizeof(dReal));
1067	#ifdef dUSE_MALLOC_FOR_ALLOCA
1068	if (d == NULL) {
1069	UNALLOCA(L);
1070	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1071	return;
1072	}
1073	#endif
1074	ALLOCA (dReal,delta_x,n*sizeof(dReal));
1075	#ifdef dUSE_MALLOC_FOR_ALLOCA
1076	if (delta_x == NULL) {
1077	UNALLOCA(d);
1078	UNALLOCA(L);
1079	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1080	return;
1081	}
1082	#endif
1083	ALLOCA (dReal,delta_w,n*sizeof(dReal));
1084	#ifdef dUSE_MALLOC_FOR_ALLOCA
1085	if (delta_w == NULL) {
1086	UNALLOCA(delta_x);
1087	UNALLOCA(d);
1088	UNALLOCA(L);
1089	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1090	return;
1091	}
1092	#endif
1093	ALLOCA (dReal,Dell,n*sizeof(dReal));
1094	#ifdef dUSE_MALLOC_FOR_ALLOCA
1095	if (Dell == NULL) {
1096	UNALLOCA(delta_w);
1097	UNALLOCA(delta_x);
1098	UNALLOCA(d);
1099	UNALLOCA(L);
1100	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1101	return;
1102	}
1103	#endif
1104	ALLOCA (dReal,ell,n*sizeof(dReal));
1105	#ifdef dUSE_MALLOC_FOR_ALLOCA
1106	if (ell == NULL) {
1107	UNALLOCA(Dell);
1108	UNALLOCA(delta_w);
1109	UNALLOCA(delta_x);
1110	UNALLOCA(d);
1111	UNALLOCA(L);
1112	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1113	return;
1114	}
1115	#endif
1116	ALLOCA (dReal,tmp,n*sizeof(dReal));
1117	#ifdef dUSE_MALLOC_FOR_ALLOCA
1118	if (tmp == NULL) {
1119	UNALLOCA(ell);
1120	UNALLOCA(Dell);
1121	UNALLOCA(delta_w);
1122	UNALLOCA(delta_x);
1123	UNALLOCA(d);
1124	UNALLOCA(L);
1125	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1126	return;
1127	}
1128	#endif
1129	ALLOCA (dReal,Arows,nsizeof(dReal*));
1130	#ifdef dUSE_MALLOC_FOR_ALLOCA
1131	if (Arows == NULL) {
1132	UNALLOCA(tmp);
1133	UNALLOCA(ell);
1134	UNALLOCA(Dell);
1135	UNALLOCA(delta_w);
1136	UNALLOCA(delta_x);
1137	UNALLOCA(d);
1138	UNALLOCA(L);
1139	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1140	return;
1141	}
1142	#endif
1143	ALLOCA (int,p,n*sizeof(int));
1144	#ifdef dUSE_MALLOC_FOR_ALLOCA
1145	if (p == NULL) {
1146	UNALLOCA(Arows);
1147	UNALLOCA(tmp);
1148	UNALLOCA(ell);
1149	UNALLOCA(Dell);
1150	UNALLOCA(delta_w);
1151	UNALLOCA(delta_x);
1152	UNALLOCA(d);
1153	UNALLOCA(L);
1154	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1155	return;
1156	}
1157	#endif
1158	ALLOCA (int,C,n*sizeof(int));
1159	#ifdef dUSE_MALLOC_FOR_ALLOCA
1160	if (C == NULL) {
1161	UNALLOCA(p);
1162	UNALLOCA(Arows);
1163	UNALLOCA(tmp);
1164	UNALLOCA(ell);
1165	UNALLOCA(Dell);
1166	UNALLOCA(delta_w);
1167	UNALLOCA(delta_x);
1168	UNALLOCA(d);
1169	UNALLOCA(L);
1170	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1171	return;
1172	}
1173	#endif
1174	ALLOCA (int,dummy,n*sizeof(int));
1175	#ifdef dUSE_MALLOC_FOR_ALLOCA
1176	if (dummy == NULL) {
1177	UNALLOCA(C);
1178	UNALLOCA(p);
1179	UNALLOCA(Arows);
1180	UNALLOCA(tmp);
1181	UNALLOCA(ell);
1182	UNALLOCA(Dell);
1183	UNALLOCA(delta_w);
1184	UNALLOCA(delta_x);
1185	UNALLOCA(d);
1186	UNALLOCA(L);
1187	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1188	return;
1189	}
1190	#endif
1191
1192
1193	dLCP lcp (n,0,A,x,b,w,tmp,tmp,L,d,Dell,ell,tmp,dummy,dummy,p,C,Arows);
1194	nub = lcp.getNub();
1195
1196	for (i=0; i<n; i++) {
1197	w[i] = lcp.AiC_times_qC (i,x) - b[i];
1198	if (w[i] >= 0) {
1199	lcp.transfer_i_to_N (i);
1200	}
1201	else {
1202	for (;;) {
1203	// compute: delta_x(C) = -A(C,C)\A(C,i)
1204	dSetZero (delta_x,n);
1205	lcp.solve1 (delta_x,i);
1206	#ifdef dUSE_MALLOC_FOR_ALLOCA
1207	if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1208	UNALLOCA(dummy);
1209	UNALLOCA(C);
1210	UNALLOCA(p);
1211	UNALLOCA(Arows);
1212	UNALLOCA(tmp);
1213	UNALLOCA(ell);
1214	UNALLOCA(Dell);
1215	UNALLOCA(delta_w);
1216	UNALLOCA(delta_x);
1217	UNALLOCA(d);
1218	UNALLOCA(L);
1219	return;
1220	}
1221	#endif
1222	delta_x[i] = 1;
1223
1224	// compute: delta_w = A*delta_x
1225	dSetZero (delta_w,n);
1226	lcp.pN_equals_ANC_times_qC (delta_w,delta_x);
1227	lcp.pN_plusequals_ANi (delta_w,i);
1228	delta_w[i] = lcp.AiC_times_qC (i,delta_x) + lcp.Aii(i);
1229
1230	// find index to switch
1231	int si = i; // si = switch index
1232	int si_in_N = 0; // set to 1 if si in N
1233	dReal s = -w[i]/delta_w[i];
1234
1235	if (s <= 0) {
1236	dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s);
1237	if (i < (n-1)) {
1238	dSetZero (x+i,n-i);
1239	dSetZero (w+i,n-i);
1240	}
1241	goto done;
1242	}
1243
1244	for (k=0; k < lcp.numN(); k++) {
1245	if (delta_w[lcp.indexN(k)] < 0) {
1246	dReal s2 = -w[lcp.indexN(k)] / delta_w[lcp.indexN(k)];
1247	if (s2 < s) {
1248	s = s2;
1249	si = lcp.indexN(k);
1250	si_in_N = 1;
1251	}
1252	}
1253	}
1254	for (k=0; k < lcp.numC(); k++) {
1255	if (delta_x[lcp.indexC(k)] < 0) {
1256	dReal s2 = -x[lcp.indexC(k)] / delta_x[lcp.indexC(k)];
1257	if (s2 < s) {
1258	s = s2;
1259	si = lcp.indexC(k);
1260	si_in_N = 0;
1261	}
1262	}
1263	}
1264
1265	// apply x = x + s * delta_x
1266	lcp.pC_plusequals_s_times_qC (x,s,delta_x);
1267	x[i] += s;
1268	lcp.pN_plusequals_s_times_qN (w,s,delta_w);
1269	w[i] += s * delta_w[i];
1270
1271	// switch indexes between sets if necessary
1272	if (si==i) {
1273	w[i] = 0;
1274	lcp.transfer_i_to_C (i);
1275	break;
1276	}
1277	if (si_in_N) {
1278	w[si] = 0;
1279	lcp.transfer_i_from_N_to_C (si);
1280	}
1281	else {
1282	x[si] = 0;
1283	lcp.transfer_i_from_C_to_N (si);
1284	}
1285	}
1286	}
1287	}
1288
1289	done:
1290	lcp.unpermute();
1291
1292	UNALLOCA (L);
1293	UNALLOCA (d);
1294	UNALLOCA (delta_x);
1295	UNALLOCA (delta_w);
1296	UNALLOCA (Dell);
1297	UNALLOCA (ell);
1298	UNALLOCA (tmp);
1299	UNALLOCA (Arows);
1300	UNALLOCA (p);
1301	UNALLOCA (C);
1302	UNALLOCA (dummy);
1303	}
1304
1305	//***************************************************************************
1306	// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1307
1308	void dSolveLCP (int n, dReal A, dReal x, dReal *b,
1309	dReal w, int nub, dReal lo, dReal hi, int findex)
1310	{
1311	dAASSERT (n>0 && A && x && b && w && lo && hi && nub >= 0 && nub <= n);
1312
1313	int i,k,hit_first_friction_index = 0;
1314	int nskip = dPAD(n);
1315
1316	// if all the variables are unbounded then we can just factor, solve,
1317	// and return
1318	if (nub >= n) {
1319	dFactorLDLT (A,w,n,nskip); // use w for d
1320	dSolveLDLT (A,w,b,n,nskip);
1321	memcpy (x,b,n*sizeof(dReal));
1322	dSetZero (w,n);
1323
1324	return;
1325	}
1326	# ifndef dNODEBUG
1327	// check restrictions on lo and hi
1328	for (k=0; k<n; k++) dIASSERT (lo[k] <= 0 && hi[k] >= 0);
1329	# endif
1330	ALLOCA (dReal,L,nnskipsizeof(dReal));
1331	#ifdef dUSE_MALLOC_FOR_ALLOCA
1332	if (L == NULL) {
1333	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1334	return;
1335	}
1336	#endif
1337	ALLOCA (dReal,d,n*sizeof(dReal));
1338	#ifdef dUSE_MALLOC_FOR_ALLOCA
1339	if (d == NULL) {
1340	UNALLOCA(L);
1341	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1342	return;
1343	}
1344	#endif
1345	ALLOCA (dReal,delta_x,n*sizeof(dReal));
1346	#ifdef dUSE_MALLOC_FOR_ALLOCA
1347	if (delta_x == NULL) {
1348	UNALLOCA(d);
1349	UNALLOCA(L);
1350	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1351	return;
1352	}
1353	#endif
1354	ALLOCA (dReal,delta_w,n*sizeof(dReal));
1355	#ifdef dUSE_MALLOC_FOR_ALLOCA
1356	if (delta_w == NULL) {
1357	UNALLOCA(delta_x);
1358	UNALLOCA(d);
1359	UNALLOCA(L);
1360	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1361	return;
1362	}
1363	#endif
1364	ALLOCA (dReal,Dell,n*sizeof(dReal));
1365	#ifdef dUSE_MALLOC_FOR_ALLOCA
1366	if (Dell == NULL) {
1367	UNALLOCA(delta_w);
1368	UNALLOCA(delta_x);
1369	UNALLOCA(d);
1370	UNALLOCA(L);
1371	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1372	return;
1373	}
1374	#endif
1375	ALLOCA (dReal,ell,n*sizeof(dReal));
1376	#ifdef dUSE_MALLOC_FOR_ALLOCA
1377	if (ell == NULL) {
1378	UNALLOCA(Dell);
1379	UNALLOCA(delta_w);
1380	UNALLOCA(delta_x);
1381	UNALLOCA(d);
1382	UNALLOCA(L);
1383	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1384	return;
1385	}
1386	#endif
1387	ALLOCA (dReal,Arows,nsizeof(dReal*));
1388	#ifdef dUSE_MALLOC_FOR_ALLOCA
1389	if (Arows == NULL) {
1390	UNALLOCA(ell);
1391	UNALLOCA(Dell);
1392	UNALLOCA(delta_w);
1393	UNALLOCA(delta_x);
1394	UNALLOCA(d);
1395	UNALLOCA(L);
1396	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1397	return;
1398	}
1399	#endif
1400	ALLOCA (int,p,n*sizeof(int));
1401	#ifdef dUSE_MALLOC_FOR_ALLOCA
1402	if (p == NULL) {
1403	UNALLOCA(Arows);
1404	UNALLOCA(ell);
1405	UNALLOCA(Dell);
1406	UNALLOCA(delta_w);
1407	UNALLOCA(delta_x);
1408	UNALLOCA(d);
1409	UNALLOCA(L);
1410	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1411	return;
1412	}
1413	#endif
1414	ALLOCA (int,C,n*sizeof(int));
1415	#ifdef dUSE_MALLOC_FOR_ALLOCA
1416	if (C == NULL) {
1417	UNALLOCA(p);
1418	UNALLOCA(Arows);
1419	UNALLOCA(ell);
1420	UNALLOCA(Dell);
1421	UNALLOCA(delta_w);
1422	UNALLOCA(delta_x);
1423	UNALLOCA(d);
1424	UNALLOCA(L);
1425	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1426	return;
1427	}
1428	#endif
1429
1430	int dir;
1431	dReal dirf;
1432
1433	// for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1434	ALLOCA (int,state,n*sizeof(int));
1435	#ifdef dUSE_MALLOC_FOR_ALLOCA
1436	if (state == NULL) {
1437	UNALLOCA(C);
1438	UNALLOCA(p);
1439	UNALLOCA(Arows);
1440	UNALLOCA(ell);
1441	UNALLOCA(Dell);
1442	UNALLOCA(delta_w);
1443	UNALLOCA(delta_x);
1444	UNALLOCA(d);
1445	UNALLOCA(L);
1446	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1447	return;
1448	}
1449	#endif
1450
1451	// create LCP object. note that tmp is set to delta_w to save space, this
1452	// optimization relies on knowledge of how tmp is used, so be careful!
1453	dLCP *lcp=new dLCP(n,nub,A,x,b,w,lo,hi,L,d,Dell,ell,delta_w,state,findex,p,C,Arows);
1454	nub = lcp->getNub();
1455
1456	// loop over all indexes nub..n-1. for index i, if x(i),w(i) satisfy the
1457	// LCP conditions then i is added to the appropriate index set. otherwise
1458	// x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1459	// as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1460	// while driving x(i) we maintain the LCP conditions on the other variables
1461	// 0..i-1. we do this by watching out for other x(i),w(i) values going
1462	// outside the valid region, and then switching them between index sets
1463	// when that happens.
1464
1465	for (i=nub; i<n; i++) {
1466	// the index i is the driving index and indexes i+1..n-1 are "dont care",
1467	// i.e. when we make changes to the system those x's will be zero and we
1468	// don't care what happens to those w's. in other words, we only consider
1469	// an (i+1)(i+1) sub-problem of Ax=b+w.
1470
1471	// if we've hit the first friction index, we have to compute the lo and
1472	// hi values based on the values of x already computed. we have been
1473	// permuting the indexes, so the values stored in the findex vector are
1474	// no longer valid. thus we have to temporarily unpermute the x vector.
1475	// for the purposes of this computation, 0*infinity = 0 ... so if the
1476	// contact constraint's normal force is 0, there should be no tangential
1477	// force applied.
1478
1479	if (hit_first_friction_index == 0 && findex && findex[i] >= 0) {
1480	// un-permute x into delta_w, which is not being used at the moment
1481	for (k=0; k<n; k++) delta_w[p[k]] = x[k];
1482
1483	// set lo and hi values
1484	for (k=i; k<n; k++) {
1485	dReal wfk = delta_w[findex[k]];
1486	if (wfk == 0) {
1487	hi[k] = 0;
1488	lo[k] = 0;
1489	}
1490	else {
1491	hi[k] = dFabs (hi[k] * wfk);
1492	lo[k] = -hi[k];
1493	}
1494	}
1495	hit_first_friction_index = 1;
1496	}
1497
1498	// thus far we have not even been computing the w values for indexes
1499	// greater than i, so compute w[i] now.
1500	w[i] = lcp->AiC_times_qC (i,x) + lcp->AiN_times_qN (i,x) - b[i];
1501
1502	// if lo=hi=0 (which can happen for tangential friction when normals are
1503	// 0) then the index will be assigned to set N with some state. however,
1504	// set C's line has zero size, so the index will always remain in set N.
1505	// with the "normal" switching logic, if w changed sign then the index
1506	// would have to switch to set C and then back to set N with an inverted
1507	// state. this is pointless, and also computationally expensive. to
1508	// prevent this from happening, we use the rule that indexes with lo=hi=0
1509	// will never be checked for set changes. this means that the state for
1510	// these indexes may be incorrect, but that doesn't matter.
1511
1512	// see if x(i),w(i) is in a valid region
1513	if (lo[i]==0 && w[i] >= 0) {
1514	lcp->transfer_i_to_N (i);
1515	state[i] = 0;
1516	}
1517	else if (hi[i]==0 && w[i] <= 0) {
1518	lcp->transfer_i_to_N (i);
1519	state[i] = 1;
1520	}
1521	else if (w[i]==0) {
1522	// this is a degenerate case. by the time we get to this test we know
1523	// that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1524	// and similarly that hi > 0. this means that the line segment
1525	// corresponding to set C is at least finite in extent, and we are on it.
1526	// NOTE: we must call lcp->solve1() before lcp->transfer_i_to_C()
1527	lcp->solve1 (delta_x,i,0,1);
1528
1529	#ifdef dUSE_MALLOC_FOR_ALLOCA
1530	if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1531	UNALLOCA(state);
1532	UNALLOCA(C);
1533	UNALLOCA(p);
1534	UNALLOCA(Arows);
1535	UNALLOCA(ell);
1536	UNALLOCA(Dell);
1537	UNALLOCA(delta_w);
1538	UNALLOCA(delta_x);
1539	UNALLOCA(d);
1540	UNALLOCA(L);
1541	return;
1542	}
1543	#endif
1544
1545	lcp->transfer_i_to_C (i);
1546	}
1547	else {
1548	// we must push x(i) and w(i)
1549	for (;;) {
1550	// find direction to push on x(i)
1551	if (w[i] <= 0) {
1552	dir = 1;
1553	dirf = REAL(1.0);
1554	}
1555	else {
1556	dir = -1;
1557	dirf = REAL(-1.0);
1558	}
1559
1560	// compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1561	lcp->solve1 (delta_x,i,dir);
1562
1563	#ifdef dUSE_MALLOC_FOR_ALLOCA
1564	if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1565	UNALLOCA(state);
1566	UNALLOCA(C);
1567	UNALLOCA(p);
1568	UNALLOCA(Arows);
1569	UNALLOCA(ell);
1570	UNALLOCA(Dell);
1571	UNALLOCA(delta_w);
1572	UNALLOCA(delta_x);
1573	UNALLOCA(d);
1574	UNALLOCA(L);
1575	return;
1576	}
1577	#endif
1578
1579	// note that delta_x[i] = dirf, but we wont bother to set it
1580
1581	// compute: delta_w = A*delta_x ... note we only care about
1582	// delta_w(N) and delta_w(i), the rest is ignored
1583	lcp->pN_equals_ANC_times_qC (delta_w,delta_x);
1584	lcp->pN_plusequals_ANi (delta_w,i,dir);
1585	delta_w[i] = lcp->AiC_times_qC (i,delta_x) + lcp->Aii(i)*dirf;
1586
1587	// find largest step we can take (size=s), either to drive x(i),w(i)
1588	// to the valid LCP region or to drive an already-valid variable
1589	// outside the valid region.
1590
1591	int cmd = 1; // index switching command
1592	int si = 0; // si = index to switch if cmd>3
1593	dReal s = -w[i]/delta_w[i];
1594	if (dir > 0) {
1595	if (hi[i] < dInfinity) {
1596	dReal s2 = (hi[i]-x[i])/dirf; // step to x(i)=hi(i)
1597	if (s2 < s) {
1598	s = s2;
1599	cmd = 3;
1600	}
1601	}
1602	}
1603	else {
1604	if (lo[i] > -dInfinity) {
1605	dReal s2 = (lo[i]-x[i])/dirf; // step to x(i)=lo(i)
1606	if (s2 < s) {
1607	s = s2;
1608	cmd = 2;
1609	}
1610	}
1611	}
1612
1613	for (k=0; k < lcp->numN(); k++) {
1614	if ((state[lcp->indexN(k)]==0 && delta_w[lcp->indexN(k)] < 0) \|\|
1615	(state[lcp->indexN(k)]!=0 && delta_w[lcp->indexN(k)] > 0)) {
1616	// don't bother checking if lo=hi=0
1617	if (lo[lcp->indexN(k)] == 0 && hi[lcp->indexN(k)] == 0) continue;
1618	dReal s2 = -w[lcp->indexN(k)] / delta_w[lcp->indexN(k)];
1619	if (s2 < s) {
1620	s = s2;
1621	cmd = 4;
1622	si = lcp->indexN(k);
1623	}
1624	}
1625	}
1626
1627	for (k=nub; k < lcp->numC(); k++) {
1628	if (delta_x[lcp->indexC(k)] < 0 && lo[lcp->indexC(k)] > -dInfinity) {
1629	dReal s2 = (lo[lcp->indexC(k)]-x[lcp->indexC(k)]) /
1630	delta_x[lcp->indexC(k)];
1631	if (s2 < s) {
1632	s = s2;
1633	cmd = 5;
1634	si = lcp->indexC(k);
1635	}
1636	}
1637	if (delta_x[lcp->indexC(k)] > 0 && hi[lcp->indexC(k)] < dInfinity) {
1638	dReal s2 = (hi[lcp->indexC(k)]-x[lcp->indexC(k)]) /
1639	delta_x[lcp->indexC(k)];
1640	if (s2 < s) {
1641	s = s2;
1642	cmd = 6;
1643	si = lcp->indexC(k);
1644	}
1645	}
1646	}
1647
1648	//static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
1649	// "C->NL","C->NH"};
1650	//printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
1651
1652	// if s <= 0 then we've got a problem. if we just keep going then
1653	// we're going to get stuck in an infinite loop. instead, just cross
1654	// our fingers and exit with the current solution.
1655	if (s <= 0) {
1656	dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s);
1657	if (i < (n-1)) {
1658	dSetZero (x+i,n-i);
1659	dSetZero (w+i,n-i);
1660	}
1661	goto done;
1662	}
1663
1664	// apply x = x + s * delta_x
1665	lcp->pC_plusequals_s_times_qC (x,s,delta_x);
1666	x[i] += s * dirf;
1667
1668	// apply w = w + s * delta_w
1669	lcp->pN_plusequals_s_times_qN (w,s,delta_w);
1670	w[i] += s * delta_w[i];
1671
1672	// switch indexes between sets if necessary
1673	switch (cmd) {
1674	case 1: // done
1675	w[i] = 0;
1676	lcp->transfer_i_to_C (i);
1677	break;
1678	case 2: // done
1679	x[i] = lo[i];
1680	state[i] = 0;
1681	lcp->transfer_i_to_N (i);
1682	break;
1683	case 3: // done
1684	x[i] = hi[i];
1685	state[i] = 1;
1686	lcp->transfer_i_to_N (i);
1687	break;
1688	case 4: // keep going
1689	w[si] = 0;
1690	lcp->transfer_i_from_N_to_C (si);
1691	break;
1692	case 5: // keep going
1693	x[si] = lo[si];
1694	state[si] = 0;
1695	lcp->transfer_i_from_C_to_N (si);
1696	break;
1697	case 6: // keep going
1698	x[si] = hi[si];
1699	state[si] = 1;
1700	lcp->transfer_i_from_C_to_N (si);
1701	break;
1702	}
1703
1704	if (cmd <= 3) break;
1705	}
1706	}
1707	}
1708
1709	done:
1710	lcp->unpermute();
1711	delete lcp;
1712
1713	UNALLOCA (L);
1714	UNALLOCA (d);
1715	UNALLOCA (delta_x);
1716	UNALLOCA (delta_w);
1717	UNALLOCA (Dell);
1718	UNALLOCA (ell);
1719	UNALLOCA (Arows);
1720	UNALLOCA (p);
1721	UNALLOCA (C);
1722	UNALLOCA (state);
1723	}
1724
1725	//***************************************************************************
1726	// accuracy and timing test
1727
1728	extern "C" ODE_API void dTestSolveLCP()
1729	{
1730	int n = 100;
1731	int i,nskip = dPAD(n);
1732	#ifdef dDOUBLE
1733	const dReal tol = REAL(1e-9);
1734	#endif
1735	#ifdef dSINGLE
1736	const dReal tol = REAL(1e-4);
1737	#endif
1738	printf ("dTestSolveLCP()\n");
1739
1740	ALLOCA (dReal,A,nnskipsizeof(dReal));
1741	#ifdef dUSE_MALLOC_FOR_ALLOCA
1742	if (A == NULL) {
1743	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1744	return;
1745	}
1746	#endif
1747	ALLOCA (dReal,x,n*sizeof(dReal));
1748	#ifdef dUSE_MALLOC_FOR_ALLOCA
1749	if (x == NULL) {
1750	UNALLOCA (A);
1751	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1752	return;
1753	}
1754	#endif
1755	ALLOCA (dReal,b,n*sizeof(dReal));
1756	#ifdef dUSE_MALLOC_FOR_ALLOCA
1757	if (b == NULL) {
1758	UNALLOCA (x);
1759	UNALLOCA (A);
1760	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1761	return;
1762	}
1763	#endif
1764	ALLOCA (dReal,w,n*sizeof(dReal));
1765	#ifdef dUSE_MALLOC_FOR_ALLOCA
1766	if (w == NULL) {
1767	UNALLOCA (b);
1768	UNALLOCA (x);
1769	UNALLOCA (A);
1770	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1771	return;
1772	}
1773	#endif
1774	ALLOCA (dReal,lo,n*sizeof(dReal));
1775	#ifdef dUSE_MALLOC_FOR_ALLOCA
1776	if (lo == NULL) {
1777	UNALLOCA (w);
1778	UNALLOCA (b);
1779	UNALLOCA (x);
1780	UNALLOCA (A);
1781	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1782	return;
1783	}
1784	#endif
1785	ALLOCA (dReal,hi,n*sizeof(dReal));
1786	#ifdef dUSE_MALLOC_FOR_ALLOCA
1787	if (hi == NULL) {
1788	UNALLOCA (lo);
1789	UNALLOCA (w);
1790	UNALLOCA (b);
1791	UNALLOCA (x);
1792	UNALLOCA (A);
1793	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1794	return;
1795	}
1796	#endif
1797
1798	ALLOCA (dReal,A2,nnskipsizeof(dReal));
1799	#ifdef dUSE_MALLOC_FOR_ALLOCA
1800	if (A2 == NULL) {
1801	UNALLOCA (hi);
1802	UNALLOCA (lo);
1803	UNALLOCA (w);
1804	UNALLOCA (b);
1805	UNALLOCA (x);
1806	UNALLOCA (A);
1807	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1808	return;
1809	}
1810	#endif
1811	ALLOCA (dReal,b2,n*sizeof(dReal));
1812	#ifdef dUSE_MALLOC_FOR_ALLOCA
1813	if (b2 == NULL) {
1814	UNALLOCA (A2);
1815	UNALLOCA (hi);
1816	UNALLOCA (lo);
1817	UNALLOCA (w);
1818	UNALLOCA (b);
1819	UNALLOCA (x);
1820	UNALLOCA (A);
1821	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1822	return;
1823	}
1824	#endif
1825	ALLOCA (dReal,lo2,n*sizeof(dReal));
1826	#ifdef dUSE_MALLOC_FOR_ALLOCA
1827	if (lo2 == NULL) {
1828	UNALLOCA (b2);
1829	UNALLOCA (A2);
1830	UNALLOCA (hi);
1831	UNALLOCA (lo);
1832	UNALLOCA (w);
1833	UNALLOCA (b);
1834	UNALLOCA (x);
1835	UNALLOCA (A);
1836	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1837	return;
1838	}
1839	#endif
1840	ALLOCA (dReal,hi2,n*sizeof(dReal));
1841	#ifdef dUSE_MALLOC_FOR_ALLOCA
1842	if (hi2 == NULL) {
1843	UNALLOCA (lo2);
1844	UNALLOCA (b2);
1845	UNALLOCA (A2);
1846	UNALLOCA (hi);
1847	UNALLOCA (lo);
1848	UNALLOCA (w);
1849	UNALLOCA (b);
1850	UNALLOCA (x);
1851	UNALLOCA (A);
1852	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1853	return;
1854	}
1855	#endif
1856	ALLOCA (dReal,tmp1,n*sizeof(dReal));
1857	#ifdef dUSE_MALLOC_FOR_ALLOCA
1858	if (tmp1 == NULL) {
1859	UNALLOCA (hi2);
1860	UNALLOCA (lo2);
1861	UNALLOCA (b2);
1862	UNALLOCA (A2);
1863	UNALLOCA (hi);
1864	UNALLOCA (lo);
1865	UNALLOCA (w);
1866	UNALLOCA (b);
1867	UNALLOCA (x);
1868	UNALLOCA (A);
1869	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1870	return;
1871	}
1872	#endif
1873	ALLOCA (dReal,tmp2,n*sizeof(dReal));
1874	#ifdef dUSE_MALLOC_FOR_ALLOCA
1875	if (tmp2 == NULL) {
1876	UNALLOCA (tmp1);
1877	UNALLOCA (hi2);
1878	UNALLOCA (lo2);
1879	UNALLOCA (b2);
1880	UNALLOCA (A2);
1881	UNALLOCA (hi);
1882	UNALLOCA (lo);
1883	UNALLOCA (w);
1884	UNALLOCA (b);
1885	UNALLOCA (x);
1886	UNALLOCA (A);
1887	dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1888	return;
1889	}
1890	#endif
1891
1892	double total_time = 0;
1893	for (int count=0; count < 1000; count++) {
1894
1895	// form (A,b) = a random positive definite LCP problem
1896	dMakeRandomMatrix (A2,n,n,1.0);
1897	dMultiply2 (A,A2,A2,n,n,n);
1898	dMakeRandomMatrix (x,n,1,1.0);
1899	dMultiply0 (b,A,x,n,n,1);
1900	for (i=0; i<n; i++) b[i] += (dRandReal()*REAL(0.2))-REAL(0.1);
1901
1902	// choose `nub' in the range 0..n-1
1903	int nub = 50; //dRandInt (n);
1904
1905	// make limits
1906	for (i=0; i<nub; i++) lo[i] = -dInfinity;
1907	for (i=0; i<nub; i++) hi[i] = dInfinity;
1908	//for (i=nub; i<n; i++) lo[i] = 0;
1909	//for (i=nub; i<n; i++) hi[i] = dInfinity;
1910	//for (i=nub; i<n; i++) lo[i] = -dInfinity;
1911	//for (i=nub; i<n; i++) hi[i] = 0;
1912	for (i=nub; i<n; i++) lo[i] = -(dRandReal()*REAL(1.0))-REAL(0.01);
1913	for (i=nub; i<n; i++) hi[i] = (dRandReal()*REAL(1.0))+REAL(0.01);
1914
1915	// set a few limits to lo=hi=0
1916	/*
1917	for (i=0; i<10; i++) {
1918	int j = dRandInt (n-nub) + nub;
1919	lo[j] = 0;
1920	hi[j] = 0;
1921	}
1922	*/
1923
1924	// solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
1925	// SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
1926	// ensure that it doesn't get referenced (if it does, the answer will be
1927	// wrong).
1928
1929	memcpy (A2,A,nnskipsizeof(dReal));
1930	dClearUpperTriangle (A2,n);
1931	memcpy (b2,b,n*sizeof(dReal));
1932	memcpy (lo2,lo,n*sizeof(dReal));
1933	memcpy (hi2,hi,n*sizeof(dReal));
1934	dSetZero (x,n);
1935	dSetZero (w,n);
1936
1937	dStopwatch sw;
1938	dStopwatchReset (&sw);
1939	dStopwatchStart (&sw);
1940
1941	dSolveLCP (n,A2,x,b2,w,nub,lo2,hi2,0);
1942	#ifdef dUSE_MALLOC_FOR_ALLOCA
1943	if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1944	UNALLOCA (tmp2);
1945	UNALLOCA (tmp1);
1946	UNALLOCA (hi2);
1947	UNALLOCA (lo2);
1948	UNALLOCA (b2);
1949	UNALLOCA (A2);
1950	UNALLOCA (hi);
1951	UNALLOCA (lo);
1952	UNALLOCA (w);
1953	UNALLOCA (b);
1954	UNALLOCA (x);
1955	UNALLOCA (A);
1956	return;
1957	}
1958	#endif
1959
1960	dStopwatchStop (&sw);
1961	double time = dStopwatchTime(&sw);
1962	total_time += time;
1963	double average = total_time / double(count+1) * 1000.0;
1964
1965	// check the solution
1966
1967	dMultiply0 (tmp1,A,x,n,n,1);
1968	for (i=0; i<n; i++) tmp2[i] = b[i] + w[i];
1969	dReal diff = dMaxDifference (tmp1,tmp2,n,1);
1970	// printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
1971	// diff > tol ? "FAILED" : "passed");
1972	if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff);
1973	int n1=0,n2=0,n3=0;
1974	for (i=0; i<n; i++) {
1975	if (x[i]==lo[i] && w[i] >= 0) {
1976	n1++; // ok
1977	}
1978	else if (x[i]==hi[i] && w[i] <= 0) {
1979	n2++; // ok
1980	}
1981	else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) {
1982	n3++; // ok
1983	}
1984	else {
1985	dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i,
1986	x[i],w[i],lo[i],hi[i]);
1987	}
1988	}
1989
1990	// pacifier
1991	printf ("passed: NL=%3d NH=%3d C=%3d ",n1,n2,n3);
1992	printf ("time=%10.3f ms avg=%10.4f\n",time * 1000.0,average);
1993	}
1994
1995	UNALLOCA (A);
1996	UNALLOCA (x);
1997	UNALLOCA (b);
1998	UNALLOCA (w);
1999	UNALLOCA (lo);
2000	UNALLOCA (hi);
2001	UNALLOCA (A2);
2002	UNALLOCA (b2);
2003	UNALLOCA (lo2);
2004	UNALLOCA (hi2);
2005	UNALLOCA (tmp1);
2006	UNALLOCA (tmp2);
2007	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: