1 | /************************************************************************* |
---|
2 | * * |
---|
3 | * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. * |
---|
4 | * All rights reserved. Email: russ@q12.org Web: www.q12.org * |
---|
5 | * * |
---|
6 | * This library is free software; you can redistribute it and/or * |
---|
7 | * modify it under the terms of EITHER: * |
---|
8 | * (1) The GNU Lesser General Public License as published by the Free * |
---|
9 | * Software Foundation; either version 2.1 of the License, or (at * |
---|
10 | * your option) any later version. The text of the GNU Lesser * |
---|
11 | * General Public License is included with this library in the * |
---|
12 | * file LICENSE.TXT. * |
---|
13 | * (2) The BSD-style license that is included with this library in * |
---|
14 | * the file LICENSE-BSD.TXT. * |
---|
15 | * * |
---|
16 | * This library is distributed in the hope that it will be useful, * |
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * |
---|
19 | * LICENSE.TXT and LICENSE-BSD.TXT for more details. * |
---|
20 | * * |
---|
21 | *************************************************************************/ |
---|
22 | |
---|
23 | #include "objects.h" |
---|
24 | #include "joint.h" |
---|
25 | #include <ode/config.h> |
---|
26 | #include <ode/odemath.h> |
---|
27 | #include <ode/rotation.h> |
---|
28 | #include <ode/timer.h> |
---|
29 | #include <ode/error.h> |
---|
30 | #include <ode/matrix.h> |
---|
31 | #include <ode/misc.h> |
---|
32 | #include "lcp.h" |
---|
33 | #include "util.h" |
---|
34 | |
---|
35 | #define ALLOCA dALLOCA16 |
---|
36 | |
---|
37 | typedef const dReal *dRealPtr; |
---|
38 | typedef dReal *dRealMutablePtr; |
---|
39 | #define dRealArray(name,n) dReal name[n]; |
---|
40 | #define dRealAllocaArray(name,n) dReal *name = (dReal*) ALLOCA ((n)*sizeof(dReal)); |
---|
41 | |
---|
42 | //*************************************************************************** |
---|
43 | // configuration |
---|
44 | |
---|
45 | // for the SOR and CG methods: |
---|
46 | // uncomment the following line to use warm starting. this definitely |
---|
47 | // help for motor-driven joints. unfortunately it appears to hurt |
---|
48 | // with high-friction contacts using the SOR method. use with care |
---|
49 | |
---|
50 | //#define WARM_STARTING 1 |
---|
51 | |
---|
52 | |
---|
53 | // for the SOR method: |
---|
54 | // uncomment the following line to determine a new constraint-solving |
---|
55 | // order for each iteration. however, the qsort per iteration is expensive, |
---|
56 | // and the optimal order is somewhat problem dependent. |
---|
57 | // @@@ try the leaf->root ordering. |
---|
58 | |
---|
59 | //#define REORDER_CONSTRAINTS 1 |
---|
60 | |
---|
61 | |
---|
62 | // for the SOR method: |
---|
63 | // uncomment the following line to randomly reorder constraint rows |
---|
64 | // during the solution. depending on the situation, this can help a lot |
---|
65 | // or hardly at all, but it doesn't seem to hurt. |
---|
66 | |
---|
67 | #define RANDOMLY_REORDER_CONSTRAINTS 1 |
---|
68 | |
---|
69 | //**************************************************************************** |
---|
70 | // special matrix multipliers |
---|
71 | |
---|
72 | // multiply block of B matrix (q x 6) with 12 dReal per row with C vektor (q) |
---|
73 | static void Multiply1_12q1 (dReal *A, dReal *B, dReal *C, int q) |
---|
74 | { |
---|
75 | int k; |
---|
76 | dReal sum; |
---|
77 | dIASSERT (q>0 && A && B && C); |
---|
78 | sum = 0; |
---|
79 | for (k=0; k<q; k++) sum += B[k*12] * C[k]; |
---|
80 | A[0] = sum; |
---|
81 | sum = 0; |
---|
82 | for (k=0; k<q; k++) sum += B[1+k*12] * C[k]; |
---|
83 | A[1] = sum; |
---|
84 | sum = 0; |
---|
85 | for (k=0; k<q; k++) sum += B[2+k*12] * C[k]; |
---|
86 | A[2] = sum; |
---|
87 | sum = 0; |
---|
88 | for (k=0; k<q; k++) sum += B[3+k*12] * C[k]; |
---|
89 | A[3] = sum; |
---|
90 | sum = 0; |
---|
91 | for (k=0; k<q; k++) sum += B[4+k*12] * C[k]; |
---|
92 | A[4] = sum; |
---|
93 | sum = 0; |
---|
94 | for (k=0; k<q; k++) sum += B[5+k*12] * C[k]; |
---|
95 | A[5] = sum; |
---|
96 | } |
---|
97 | |
---|
98 | //*************************************************************************** |
---|
99 | // testing stuff |
---|
100 | |
---|
101 | #ifdef TIMING |
---|
102 | #define IFTIMING(x) x |
---|
103 | #else |
---|
104 | #define IFTIMING(x) /* */ |
---|
105 | #endif |
---|
106 | |
---|
107 | //*************************************************************************** |
---|
108 | // various common computations involving the matrix J |
---|
109 | |
---|
110 | // compute iMJ = inv(M)*J' |
---|
111 | |
---|
112 | static void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb, |
---|
113 | dxBody * const *body, dRealPtr invI) |
---|
114 | { |
---|
115 | int i,j; |
---|
116 | dRealMutablePtr iMJ_ptr = iMJ; |
---|
117 | dRealMutablePtr J_ptr = J; |
---|
118 | for (i=0; i<m; i++) { |
---|
119 | int b1 = jb[i*2]; |
---|
120 | int b2 = jb[i*2+1]; |
---|
121 | dReal k = body[b1]->invMass; |
---|
122 | for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j]; |
---|
123 | dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3); |
---|
124 | if (b2 >= 0) { |
---|
125 | k = body[b2]->invMass; |
---|
126 | for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6]; |
---|
127 | dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9); |
---|
128 | } |
---|
129 | J_ptr += 12; |
---|
130 | iMJ_ptr += 12; |
---|
131 | } |
---|
132 | } |
---|
133 | |
---|
134 | |
---|
135 | // compute out = inv(M)*J'*in. |
---|
136 | |
---|
137 | static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb, |
---|
138 | dRealMutablePtr in, dRealMutablePtr out) |
---|
139 | { |
---|
140 | int i,j; |
---|
141 | dSetZero (out,6*nb); |
---|
142 | dRealPtr iMJ_ptr = iMJ; |
---|
143 | for (i=0; i<m; i++) { |
---|
144 | int b1 = jb[i*2]; |
---|
145 | int b2 = jb[i*2+1]; |
---|
146 | dRealMutablePtr out_ptr = out + b1*6; |
---|
147 | for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; |
---|
148 | iMJ_ptr += 6; |
---|
149 | if (b2 >= 0) { |
---|
150 | out_ptr = out + b2*6; |
---|
151 | for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; |
---|
152 | } |
---|
153 | iMJ_ptr += 6; |
---|
154 | } |
---|
155 | } |
---|
156 | |
---|
157 | |
---|
158 | // compute out = J*in. |
---|
159 | |
---|
160 | static void multiply_J (int m, dRealMutablePtr J, int *jb, |
---|
161 | dRealMutablePtr in, dRealMutablePtr out) |
---|
162 | { |
---|
163 | int i,j; |
---|
164 | dRealPtr J_ptr = J; |
---|
165 | for (i=0; i<m; i++) { |
---|
166 | int b1 = jb[i*2]; |
---|
167 | int b2 = jb[i*2+1]; |
---|
168 | dReal sum = 0; |
---|
169 | dRealMutablePtr in_ptr = in + b1*6; |
---|
170 | for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; |
---|
171 | J_ptr += 6; |
---|
172 | if (b2 >= 0) { |
---|
173 | in_ptr = in + b2*6; |
---|
174 | for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; |
---|
175 | } |
---|
176 | J_ptr += 6; |
---|
177 | out[i] = sum; |
---|
178 | } |
---|
179 | } |
---|
180 | |
---|
181 | |
---|
182 | // compute out = (J*inv(M)*J' + cfm)*in. |
---|
183 | // use z as an nb*6 temporary. |
---|
184 | |
---|
185 | static void multiply_J_invM_JT (int m, int nb, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb, |
---|
186 | dRealPtr cfm, dRealMutablePtr z, dRealMutablePtr in, dRealMutablePtr out) |
---|
187 | { |
---|
188 | multiply_invM_JT (m,nb,iMJ,jb,in,z); |
---|
189 | multiply_J (m,J,jb,z,out); |
---|
190 | |
---|
191 | // add cfm |
---|
192 | for (int i=0; i<m; i++) out[i] += cfm[i] * in[i]; |
---|
193 | } |
---|
194 | |
---|
195 | //*************************************************************************** |
---|
196 | // conjugate gradient method with jacobi preconditioner |
---|
197 | // THIS IS EXPERIMENTAL CODE that doesn't work too well, so it is ifdefed out. |
---|
198 | // |
---|
199 | // adding CFM seems to be critically important to this method. |
---|
200 | |
---|
201 | #if 0 |
---|
202 | |
---|
203 | static inline dReal dot (int n, dRealPtr x, dRealPtr y) |
---|
204 | { |
---|
205 | dReal sum=0; |
---|
206 | for (int i=0; i<n; i++) sum += x[i]*y[i]; |
---|
207 | return sum; |
---|
208 | } |
---|
209 | |
---|
210 | |
---|
211 | // x = y + z*alpha |
---|
212 | |
---|
213 | static inline void add (int n, dRealMutablePtr x, dRealPtr y, dRealPtr z, dReal alpha) |
---|
214 | { |
---|
215 | for (int i=0; i<n; i++) x[i] = y[i] + z[i]*alpha; |
---|
216 | } |
---|
217 | |
---|
218 | |
---|
219 | static void CG_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body, |
---|
220 | dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b, |
---|
221 | dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, |
---|
222 | dxQuickStepParameters *qs) |
---|
223 | { |
---|
224 | int i,j; |
---|
225 | const int num_iterations = qs->num_iterations; |
---|
226 | |
---|
227 | // precompute iMJ = inv(M)*J' |
---|
228 | dRealAllocaArray (iMJ,m*12); |
---|
229 | compute_invM_JT (m,J,iMJ,jb,body,invI); |
---|
230 | |
---|
231 | dReal last_rho = 0; |
---|
232 | dRealAllocaArray (r,m); |
---|
233 | dRealAllocaArray (z,m); |
---|
234 | dRealAllocaArray (p,m); |
---|
235 | dRealAllocaArray (q,m); |
---|
236 | |
---|
237 | // precompute 1 / diagonals of A |
---|
238 | dRealAllocaArray (Ad,m); |
---|
239 | dRealPtr iMJ_ptr = iMJ; |
---|
240 | dRealPtr J_ptr = J; |
---|
241 | for (i=0; i<m; i++) { |
---|
242 | dReal sum = 0; |
---|
243 | for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j]; |
---|
244 | if (jb[i*2+1] >= 0) { |
---|
245 | for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j]; |
---|
246 | } |
---|
247 | iMJ_ptr += 12; |
---|
248 | J_ptr += 12; |
---|
249 | Ad[i] = REAL(1.0) / (sum + cfm[i]); |
---|
250 | } |
---|
251 | |
---|
252 | #ifdef WARM_STARTING |
---|
253 | // compute residual r = b - A*lambda |
---|
254 | multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r); |
---|
255 | for (i=0; i<m; i++) r[i] = b[i] - r[i]; |
---|
256 | #else |
---|
257 | dSetZero (lambda,m); |
---|
258 | memcpy (r,b,m*sizeof(dReal)); // residual r = b - A*lambda |
---|
259 | #endif |
---|
260 | |
---|
261 | for (int iteration=0; iteration < num_iterations; iteration++) { |
---|
262 | for (i=0; i<m; i++) z[i] = r[i]*Ad[i]; // z = inv(M)*r |
---|
263 | dReal rho = dot (m,r,z); // rho = r'*z |
---|
264 | |
---|
265 | // @@@ |
---|
266 | // we must check for convergence, otherwise rho will go to 0 if |
---|
267 | // we get an exact solution, which will introduce NaNs into the equations. |
---|
268 | if (rho < 1e-10) { |
---|
269 | printf ("CG returned at iteration %d\n",iteration); |
---|
270 | break; |
---|
271 | } |
---|
272 | |
---|
273 | if (iteration==0) { |
---|
274 | memcpy (p,z,m*sizeof(dReal)); // p = z |
---|
275 | } |
---|
276 | else { |
---|
277 | add (m,p,z,p,rho/last_rho); // p = z + (rho/last_rho)*p |
---|
278 | } |
---|
279 | |
---|
280 | // compute q = (J*inv(M)*J')*p |
---|
281 | multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,p,q); |
---|
282 | |
---|
283 | dReal alpha = rho/dot (m,p,q); // alpha = rho/(p'*q) |
---|
284 | add (m,lambda,lambda,p,alpha); // lambda = lambda + alpha*p |
---|
285 | add (m,r,r,q,-alpha); // r = r - alpha*q |
---|
286 | last_rho = rho; |
---|
287 | } |
---|
288 | |
---|
289 | // compute fc = inv(M)*J'*lambda |
---|
290 | multiply_invM_JT (m,nb,iMJ,jb,lambda,fc); |
---|
291 | |
---|
292 | #if 0 |
---|
293 | // measure solution error |
---|
294 | multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r); |
---|
295 | dReal error = 0; |
---|
296 | for (i=0; i<m; i++) error += dFabs(r[i] - b[i]); |
---|
297 | printf ("lambda error = %10.6e\n",error); |
---|
298 | #endif |
---|
299 | } |
---|
300 | |
---|
301 | #endif |
---|
302 | |
---|
303 | //*************************************************************************** |
---|
304 | // SOR-LCP method |
---|
305 | |
---|
306 | // nb is the number of bodies in the body array. |
---|
307 | // J is an m*12 matrix of constraint rows |
---|
308 | // jb is an array of first and second body numbers for each constraint row |
---|
309 | // invI is the global frame inverse inertia for each body (stacked 3x3 matrices) |
---|
310 | // |
---|
311 | // this returns lambda and fc (the constraint force). |
---|
312 | // note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda |
---|
313 | // |
---|
314 | // b, lo and hi are modified on exit |
---|
315 | |
---|
316 | |
---|
317 | struct IndexError { |
---|
318 | dReal error; // error to sort on |
---|
319 | int findex; |
---|
320 | int index; // row index |
---|
321 | }; |
---|
322 | |
---|
323 | |
---|
324 | #ifdef REORDER_CONSTRAINTS |
---|
325 | |
---|
326 | static int compare_index_error (const void *a, const void *b) |
---|
327 | { |
---|
328 | const IndexError *i1 = (IndexError*) a; |
---|
329 | const IndexError *i2 = (IndexError*) b; |
---|
330 | if (i1->findex < 0 && i2->findex >= 0) return -1; |
---|
331 | if (i1->findex >= 0 && i2->findex < 0) return 1; |
---|
332 | if (i1->error < i2->error) return -1; |
---|
333 | if (i1->error > i2->error) return 1; |
---|
334 | return 0; |
---|
335 | } |
---|
336 | |
---|
337 | #endif |
---|
338 | |
---|
339 | |
---|
340 | static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body, |
---|
341 | dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b, |
---|
342 | dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, |
---|
343 | dxQuickStepParameters *qs) |
---|
344 | { |
---|
345 | const int num_iterations = qs->num_iterations; |
---|
346 | const dReal sor_w = qs->w; // SOR over-relaxation parameter |
---|
347 | |
---|
348 | int i,j; |
---|
349 | |
---|
350 | #ifdef WARM_STARTING |
---|
351 | // for warm starting, this seems to be necessary to prevent |
---|
352 | // jerkiness in motor-driven joints. i have no idea why this works. |
---|
353 | for (i=0; i<m; i++) lambda[i] *= 0.9; |
---|
354 | #else |
---|
355 | dSetZero (lambda,m); |
---|
356 | #endif |
---|
357 | |
---|
358 | #ifdef REORDER_CONSTRAINTS |
---|
359 | // the lambda computed at the previous iteration. |
---|
360 | // this is used to measure error for when we are reordering the indexes. |
---|
361 | dRealAllocaArray (last_lambda,m); |
---|
362 | #endif |
---|
363 | |
---|
364 | // a copy of the 'hi' vector in case findex[] is being used |
---|
365 | dRealAllocaArray (hicopy,m); |
---|
366 | memcpy (hicopy,hi,m*sizeof(dReal)); |
---|
367 | |
---|
368 | // precompute iMJ = inv(M)*J' |
---|
369 | dRealAllocaArray (iMJ,m*12); |
---|
370 | compute_invM_JT (m,J,iMJ,jb,body,invI); |
---|
371 | |
---|
372 | // compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc |
---|
373 | // as we change lambda. |
---|
374 | #ifdef WARM_STARTING |
---|
375 | multiply_invM_JT (m,nb,iMJ,jb,lambda,fc); |
---|
376 | #else |
---|
377 | dSetZero (fc,nb*6); |
---|
378 | #endif |
---|
379 | |
---|
380 | // precompute 1 / diagonals of A |
---|
381 | dRealAllocaArray (Ad,m); |
---|
382 | dRealPtr iMJ_ptr = iMJ; |
---|
383 | dRealMutablePtr J_ptr = J; |
---|
384 | for (i=0; i<m; i++) { |
---|
385 | dReal sum = 0; |
---|
386 | for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j]; |
---|
387 | if (jb[i*2+1] >= 0) { |
---|
388 | for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j]; |
---|
389 | } |
---|
390 | iMJ_ptr += 12; |
---|
391 | J_ptr += 12; |
---|
392 | Ad[i] = sor_w / (sum + cfm[i]); |
---|
393 | } |
---|
394 | |
---|
395 | // scale J and b by Ad |
---|
396 | J_ptr = J; |
---|
397 | for (i=0; i<m; i++) { |
---|
398 | for (j=0; j<12; j++) { |
---|
399 | J_ptr[0] *= Ad[i]; |
---|
400 | J_ptr++; |
---|
401 | } |
---|
402 | b[i] *= Ad[i]; |
---|
403 | |
---|
404 | // scale Ad by CFM. N.B. this should be done last since it is used above |
---|
405 | Ad[i] *= cfm[i]; |
---|
406 | } |
---|
407 | |
---|
408 | // order to solve constraint rows in |
---|
409 | IndexError *order = (IndexError*) ALLOCA (m*sizeof(IndexError)); |
---|
410 | |
---|
411 | #ifndef REORDER_CONSTRAINTS |
---|
412 | // make sure constraints with findex < 0 come first. |
---|
413 | j=0; |
---|
414 | int k=1; |
---|
415 | |
---|
416 | // Fill the array from both ends |
---|
417 | for (i=0; i<m; i++) |
---|
418 | if (findex[i] < 0) |
---|
419 | order[j++].index = i; // Place them at the front |
---|
420 | else |
---|
421 | order[m-k++].index = i; // Place them at the end |
---|
422 | |
---|
423 | dIASSERT ((j+k-1)==m); // -1 since k was started at 1 and not 0 |
---|
424 | #endif |
---|
425 | |
---|
426 | for (int iteration=0; iteration < num_iterations; iteration++) { |
---|
427 | |
---|
428 | #ifdef REORDER_CONSTRAINTS |
---|
429 | // constraints with findex < 0 always come first. |
---|
430 | if (iteration < 2) { |
---|
431 | // for the first two iterations, solve the constraints in |
---|
432 | // the given order |
---|
433 | for (i=0; i<m; i++) { |
---|
434 | order[i].error = i; |
---|
435 | order[i].findex = findex[i]; |
---|
436 | order[i].index = i; |
---|
437 | } |
---|
438 | } |
---|
439 | else { |
---|
440 | // sort the constraints so that the ones converging slowest |
---|
441 | // get solved last. use the absolute (not relative) error. |
---|
442 | for (i=0; i<m; i++) { |
---|
443 | dReal v1 = dFabs (lambda[i]); |
---|
444 | dReal v2 = dFabs (last_lambda[i]); |
---|
445 | dReal max = (v1 > v2) ? v1 : v2; |
---|
446 | if (max > 0) { |
---|
447 | //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max; |
---|
448 | order[i].error = dFabs(lambda[i]-last_lambda[i]); |
---|
449 | } |
---|
450 | else { |
---|
451 | order[i].error = dInfinity; |
---|
452 | } |
---|
453 | order[i].findex = findex[i]; |
---|
454 | order[i].index = i; |
---|
455 | } |
---|
456 | } |
---|
457 | qsort (order,m,sizeof(IndexError),&compare_index_error); |
---|
458 | |
---|
459 | //@@@ potential optimization: swap lambda and last_lambda pointers rather |
---|
460 | // than copying the data. we must make sure lambda is properly |
---|
461 | // returned to the caller |
---|
462 | memcpy (last_lambda,lambda,m*sizeof(dReal)); |
---|
463 | #endif |
---|
464 | #ifdef RANDOMLY_REORDER_CONSTRAINTS |
---|
465 | if ((iteration & 7) == 0) { |
---|
466 | for (i=1; i<m; ++i) { |
---|
467 | IndexError tmp = order[i]; |
---|
468 | int swapi = dRandInt(i+1); |
---|
469 | order[i] = order[swapi]; |
---|
470 | order[swapi] = tmp; |
---|
471 | } |
---|
472 | } |
---|
473 | #endif |
---|
474 | |
---|
475 | for (int i=0; i<m; i++) { |
---|
476 | // @@@ potential optimization: we could pre-sort J and iMJ, thereby |
---|
477 | // linearizing access to those arrays. hmmm, this does not seem |
---|
478 | // like a win, but we should think carefully about our memory |
---|
479 | // access pattern. |
---|
480 | |
---|
481 | int index = order[i].index; |
---|
482 | J_ptr = J + index*12; |
---|
483 | iMJ_ptr = iMJ + index*12; |
---|
484 | |
---|
485 | // set the limits for this constraint. note that 'hicopy' is used. |
---|
486 | // this is the place where the QuickStep method differs from the |
---|
487 | // direct LCP solving method, since that method only performs this |
---|
488 | // limit adjustment once per time step, whereas this method performs |
---|
489 | // once per iteration per constraint row. |
---|
490 | // the constraints are ordered so that all lambda[] values needed have |
---|
491 | // already been computed. |
---|
492 | if (findex[index] >= 0) { |
---|
493 | hi[index] = dFabs (hicopy[index] * lambda[findex[index]]); |
---|
494 | lo[index] = -hi[index]; |
---|
495 | } |
---|
496 | |
---|
497 | int b1 = jb[index*2]; |
---|
498 | int b2 = jb[index*2+1]; |
---|
499 | dReal delta = b[index] - lambda[index]*Ad[index]; |
---|
500 | dRealMutablePtr fc_ptr = fc + 6*b1; |
---|
501 | |
---|
502 | // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case |
---|
503 | delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] + |
---|
504 | fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] + |
---|
505 | fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5]; |
---|
506 | // @@@ potential optimization: handle 1-body constraints in a separate |
---|
507 | // loop to avoid the cost of test & jump? |
---|
508 | if (b2 >= 0) { |
---|
509 | fc_ptr = fc + 6*b2; |
---|
510 | delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] + |
---|
511 | fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] + |
---|
512 | fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11]; |
---|
513 | } |
---|
514 | |
---|
515 | // compute lambda and clamp it to [lo,hi]. |
---|
516 | // @@@ potential optimization: does SSE have clamping instructions |
---|
517 | // to save test+jump penalties here? |
---|
518 | dReal new_lambda = lambda[index] + delta; |
---|
519 | if (new_lambda < lo[index]) { |
---|
520 | delta = lo[index]-lambda[index]; |
---|
521 | lambda[index] = lo[index]; |
---|
522 | } |
---|
523 | else if (new_lambda > hi[index]) { |
---|
524 | delta = hi[index]-lambda[index]; |
---|
525 | lambda[index] = hi[index]; |
---|
526 | } |
---|
527 | else { |
---|
528 | lambda[index] = new_lambda; |
---|
529 | } |
---|
530 | |
---|
531 | //@@@ a trick that may or may not help |
---|
532 | //dReal ramp = (1-((dReal)(iteration+1)/(dReal)num_iterations)); |
---|
533 | //delta *= ramp; |
---|
534 | |
---|
535 | // update fc. |
---|
536 | // @@@ potential optimization: SIMD for this and the b2 >= 0 case |
---|
537 | fc_ptr = fc + 6*b1; |
---|
538 | fc_ptr[0] += delta * iMJ_ptr[0]; |
---|
539 | fc_ptr[1] += delta * iMJ_ptr[1]; |
---|
540 | fc_ptr[2] += delta * iMJ_ptr[2]; |
---|
541 | fc_ptr[3] += delta * iMJ_ptr[3]; |
---|
542 | fc_ptr[4] += delta * iMJ_ptr[4]; |
---|
543 | fc_ptr[5] += delta * iMJ_ptr[5]; |
---|
544 | // @@@ potential optimization: handle 1-body constraints in a separate |
---|
545 | // loop to avoid the cost of test & jump? |
---|
546 | if (b2 >= 0) { |
---|
547 | fc_ptr = fc + 6*b2; |
---|
548 | fc_ptr[0] += delta * iMJ_ptr[6]; |
---|
549 | fc_ptr[1] += delta * iMJ_ptr[7]; |
---|
550 | fc_ptr[2] += delta * iMJ_ptr[8]; |
---|
551 | fc_ptr[3] += delta * iMJ_ptr[9]; |
---|
552 | fc_ptr[4] += delta * iMJ_ptr[10]; |
---|
553 | fc_ptr[5] += delta * iMJ_ptr[11]; |
---|
554 | } |
---|
555 | } |
---|
556 | } |
---|
557 | } |
---|
558 | |
---|
559 | |
---|
560 | void dxQuickStepper (dxWorld *world, dxBody * const *body, int nb, |
---|
561 | dxJoint * const *_joint, int nj, dReal stepsize) |
---|
562 | { |
---|
563 | int i,j; |
---|
564 | IFTIMING(dTimerStart("preprocessing");) |
---|
565 | |
---|
566 | dReal stepsize1 = dRecip(stepsize); |
---|
567 | |
---|
568 | // number all bodies in the body list - set their tag values |
---|
569 | for (i=0; i<nb; i++) body[i]->tag = i; |
---|
570 | |
---|
571 | // make a local copy of the joint array, because we might want to modify it. |
---|
572 | // (the "dxJoint *const*" declaration says we're allowed to modify the joints |
---|
573 | // but not the joint array, because the caller might need it unchanged). |
---|
574 | //@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints |
---|
575 | dxJoint **joint = (dxJoint**) ALLOCA (nj * sizeof(dxJoint*)); |
---|
576 | memcpy (joint,_joint,nj * sizeof(dxJoint*)); |
---|
577 | |
---|
578 | // for all bodies, compute the inertia tensor and its inverse in the global |
---|
579 | // frame, and compute the rotational force and add it to the torque |
---|
580 | // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body. |
---|
581 | //dRealAllocaArray (I,3*4*nb); // need to remember all I's for feedback purposes only |
---|
582 | dRealAllocaArray (invI,3*4*nb); |
---|
583 | for (i=0; i<nb; i++) { |
---|
584 | dMatrix3 tmp; |
---|
585 | |
---|
586 | // compute inverse inertia tensor in global frame |
---|
587 | dMULTIPLY2_333 (tmp,body[i]->invI,body[i]->posr.R); |
---|
588 | dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp); |
---|
589 | #ifdef dGYROSCOPIC |
---|
590 | dMatrix3 I; |
---|
591 | // compute inertia tensor in global frame |
---|
592 | dMULTIPLY2_333 (tmp,body[i]->mass.I,body[i]->posr.R); |
---|
593 | //dMULTIPLY0_333 (I+i*12,body[i]->posr.R,tmp); |
---|
594 | dMULTIPLY0_333 (I,body[i]->posr.R,tmp); |
---|
595 | // compute rotational force |
---|
596 | //dMULTIPLY0_331 (tmp,I+i*12,body[i]->avel); |
---|
597 | dMULTIPLY0_331 (tmp,I,body[i]->avel); |
---|
598 | dCROSS (body[i]->tacc,-=,body[i]->avel,tmp); |
---|
599 | #endif |
---|
600 | } |
---|
601 | |
---|
602 | // add the gravity force to all bodies |
---|
603 | for (i=0; i<nb; i++) { |
---|
604 | if ((body[i]->flags & dxBodyNoGravity)==0) { |
---|
605 | body[i]->facc[0] += body[i]->mass.mass * world->gravity[0]; |
---|
606 | body[i]->facc[1] += body[i]->mass.mass * world->gravity[1]; |
---|
607 | body[i]->facc[2] += body[i]->mass.mass * world->gravity[2]; |
---|
608 | } |
---|
609 | } |
---|
610 | |
---|
611 | // get joint information (m = total constraint dimension, nub = number of unbounded variables). |
---|
612 | // joints with m=0 are inactive and are removed from the joints array |
---|
613 | // entirely, so that the code that follows does not consider them. |
---|
614 | //@@@ do we really need to save all the info1's |
---|
615 | dxJoint::Info1 *info = (dxJoint::Info1*) ALLOCA (nj*sizeof(dxJoint::Info1)); |
---|
616 | for (i=0, j=0; j<nj; j++) { // i=dest, j=src |
---|
617 | joint[j]->vtable->getInfo1 (joint[j],info+i); |
---|
618 | dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m); |
---|
619 | if (info[i].m > 0) { |
---|
620 | joint[i] = joint[j]; |
---|
621 | i++; |
---|
622 | } |
---|
623 | } |
---|
624 | nj = i; |
---|
625 | |
---|
626 | // create the row offset array |
---|
627 | int m = 0; |
---|
628 | int *ofs = (int*) ALLOCA (nj*sizeof(int)); |
---|
629 | for (i=0; i<nj; i++) { |
---|
630 | ofs[i] = m; |
---|
631 | m += info[i].m; |
---|
632 | } |
---|
633 | |
---|
634 | // if there are constraints, compute the constraint force |
---|
635 | dRealAllocaArray (J,m*12); |
---|
636 | int *jb = (int*) ALLOCA (m*2*sizeof(int)); |
---|
637 | if (m > 0) { |
---|
638 | // create a constraint equation right hand side vector `c', a constraint |
---|
639 | // force mixing vector `cfm', and LCP low and high bound vectors, and an |
---|
640 | // 'findex' vector. |
---|
641 | dRealAllocaArray (c,m); |
---|
642 | dRealAllocaArray (cfm,m); |
---|
643 | dRealAllocaArray (lo,m); |
---|
644 | dRealAllocaArray (hi,m); |
---|
645 | int *findex = (int*) ALLOCA (m*sizeof(int)); |
---|
646 | dSetZero (c,m); |
---|
647 | dSetValue (cfm,m,world->global_cfm); |
---|
648 | dSetValue (lo,m,-dInfinity); |
---|
649 | dSetValue (hi,m, dInfinity); |
---|
650 | for (i=0; i<m; i++) findex[i] = -1; |
---|
651 | |
---|
652 | // get jacobian data from constraints. an m*12 matrix will be created |
---|
653 | // to store the two jacobian blocks from each constraint. it has this |
---|
654 | // format: |
---|
655 | // |
---|
656 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 \ . |
---|
657 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }-- jacobian for joint 0, body 1 and body 2 (3 rows) |
---|
658 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 / |
---|
659 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }--- jacobian for joint 1, body 1 and body 2 (3 rows) |
---|
660 | // etc... |
---|
661 | // |
---|
662 | // (lll) = linear jacobian data |
---|
663 | // (aaa) = angular jacobian data |
---|
664 | // |
---|
665 | IFTIMING (dTimerNow ("create J");) |
---|
666 | dSetZero (J,m*12); |
---|
667 | dxJoint::Info2 Jinfo; |
---|
668 | Jinfo.rowskip = 12; |
---|
669 | Jinfo.fps = stepsize1; |
---|
670 | Jinfo.erp = world->global_erp; |
---|
671 | int mfb = 0; // number of rows of Jacobian we will have to save for joint feedback |
---|
672 | for (i=0; i<nj; i++) { |
---|
673 | Jinfo.J1l = J + ofs[i]*12; |
---|
674 | Jinfo.J1a = Jinfo.J1l + 3; |
---|
675 | Jinfo.J2l = Jinfo.J1l + 6; |
---|
676 | Jinfo.J2a = Jinfo.J1l + 9; |
---|
677 | Jinfo.c = c + ofs[i]; |
---|
678 | Jinfo.cfm = cfm + ofs[i]; |
---|
679 | Jinfo.lo = lo + ofs[i]; |
---|
680 | Jinfo.hi = hi + ofs[i]; |
---|
681 | Jinfo.findex = findex + ofs[i]; |
---|
682 | joint[i]->vtable->getInfo2 (joint[i],&Jinfo); |
---|
683 | // adjust returned findex values for global index numbering |
---|
684 | for (j=0; j<info[i].m; j++) { |
---|
685 | if (findex[ofs[i] + j] >= 0) findex[ofs[i] + j] += ofs[i]; |
---|
686 | } |
---|
687 | if (joint[i]->feedback) |
---|
688 | mfb += info[i].m; |
---|
689 | } |
---|
690 | |
---|
691 | // we need a copy of Jacobian for joint feedbacks |
---|
692 | // because it gets destroyed by SOR solver |
---|
693 | // instead of saving all Jacobian, we can save just rows |
---|
694 | // for joints, that requested feedback (which is normaly much less) |
---|
695 | dRealAllocaArray (Jcopy,mfb*12); |
---|
696 | if (mfb > 0) { |
---|
697 | mfb = 0; |
---|
698 | for (i=0; i<nj; i++) |
---|
699 | if (joint[i]->feedback) { |
---|
700 | memcpy(Jcopy+mfb*12, J+ofs[i]*12, info[i].m*12*sizeof(dReal)); |
---|
701 | mfb += info[i].m; |
---|
702 | } |
---|
703 | } |
---|
704 | |
---|
705 | |
---|
706 | // create an array of body numbers for each joint row |
---|
707 | int *jb_ptr = jb; |
---|
708 | for (i=0; i<nj; i++) { |
---|
709 | int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->tag) : -1; |
---|
710 | int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->tag) : -1; |
---|
711 | for (j=0; j<info[i].m; j++) { |
---|
712 | jb_ptr[0] = b1; |
---|
713 | jb_ptr[1] = b2; |
---|
714 | jb_ptr += 2; |
---|
715 | } |
---|
716 | } |
---|
717 | dIASSERT (jb_ptr == jb+2*m); |
---|
718 | |
---|
719 | // compute the right hand side `rhs' |
---|
720 | IFTIMING (dTimerNow ("compute rhs");) |
---|
721 | dRealAllocaArray (tmp1,nb*6); |
---|
722 | // put v/h + invM*fe into tmp1 |
---|
723 | for (i=0; i<nb; i++) { |
---|
724 | dReal body_invMass = body[i]->invMass; |
---|
725 | for (j=0; j<3; j++) tmp1[i*6+j] = body[i]->facc[j] * body_invMass + body[i]->lvel[j] * stepsize1; |
---|
726 | dMULTIPLY0_331 (tmp1 + i*6 + 3,invI + i*12,body[i]->tacc); |
---|
727 | for (j=0; j<3; j++) tmp1[i*6+3+j] += body[i]->avel[j] * stepsize1; |
---|
728 | } |
---|
729 | |
---|
730 | // put J*tmp1 into rhs |
---|
731 | dRealAllocaArray (rhs,m); |
---|
732 | multiply_J (m,J,jb,tmp1,rhs); |
---|
733 | |
---|
734 | // complete rhs |
---|
735 | for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i]; |
---|
736 | |
---|
737 | // scale CFM |
---|
738 | for (i=0; i<m; i++) cfm[i] *= stepsize1; |
---|
739 | |
---|
740 | // load lambda from the value saved on the previous iteration |
---|
741 | dRealAllocaArray (lambda,m); |
---|
742 | #ifdef WARM_STARTING |
---|
743 | dSetZero (lambda,m); //@@@ shouldn't be necessary |
---|
744 | for (i=0; i<nj; i++) { |
---|
745 | memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(dReal)); |
---|
746 | } |
---|
747 | #endif |
---|
748 | |
---|
749 | // solve the LCP problem and get lambda and invM*constraint_force |
---|
750 | IFTIMING (dTimerNow ("solving LCP problem");) |
---|
751 | dRealAllocaArray (cforce,nb*6); |
---|
752 | SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,&world->qs); |
---|
753 | |
---|
754 | #ifdef WARM_STARTING |
---|
755 | // save lambda for the next iteration |
---|
756 | //@@@ note that this doesn't work for contact joints yet, as they are |
---|
757 | // recreated every iteration |
---|
758 | for (i=0; i<nj; i++) { |
---|
759 | memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(dReal)); |
---|
760 | } |
---|
761 | #endif |
---|
762 | |
---|
763 | // note that the SOR method overwrites rhs and J at this point, so |
---|
764 | // they should not be used again. |
---|
765 | |
---|
766 | // add stepsize * cforce to the body velocity |
---|
767 | for (i=0; i<nb; i++) { |
---|
768 | for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * cforce[i*6+j]; |
---|
769 | for (j=0; j<3; j++) body[i]->avel[j] += stepsize * cforce[i*6+3+j]; |
---|
770 | } |
---|
771 | |
---|
772 | // if joint feedback is requested, compute the constraint force. |
---|
773 | // BUT: cforce is inv(M)*J'*lambda, whereas we want just J'*lambda, |
---|
774 | // so we must compute M*cforce. |
---|
775 | // @@@ if any joint has a feedback request we compute the entire |
---|
776 | // adjusted cforce, which is not the most efficient way to do it. |
---|
777 | /*for (j=0; j<nj; j++) { |
---|
778 | if (joint[j]->feedback) { |
---|
779 | // compute adjusted cforce |
---|
780 | for (i=0; i<nb; i++) { |
---|
781 | dReal k = body[i]->mass.mass; |
---|
782 | cforce [i*6+0] *= k; |
---|
783 | cforce [i*6+1] *= k; |
---|
784 | cforce [i*6+2] *= k; |
---|
785 | dVector3 tmp; |
---|
786 | dMULTIPLY0_331 (tmp, I + 12*i, cforce + i*6 + 3); |
---|
787 | cforce [i*6+3] = tmp[0]; |
---|
788 | cforce [i*6+4] = tmp[1]; |
---|
789 | cforce [i*6+5] = tmp[2]; |
---|
790 | } |
---|
791 | // compute feedback for this and all remaining joints |
---|
792 | for (; j<nj; j++) { |
---|
793 | dJointFeedback *fb = joint[j]->feedback; |
---|
794 | if (fb) { |
---|
795 | int b1 = joint[j]->node[0].body->tag; |
---|
796 | memcpy (fb->f1,cforce+b1*6,3*sizeof(dReal)); |
---|
797 | memcpy (fb->t1,cforce+b1*6+3,3*sizeof(dReal)); |
---|
798 | if (joint[j]->node[1].body) { |
---|
799 | int b2 = joint[j]->node[1].body->tag; |
---|
800 | memcpy (fb->f2,cforce+b2*6,3*sizeof(dReal)); |
---|
801 | memcpy (fb->t2,cforce+b2*6+3,3*sizeof(dReal)); |
---|
802 | } |
---|
803 | } |
---|
804 | } |
---|
805 | } |
---|
806 | }*/ |
---|
807 | |
---|
808 | if (mfb > 0) { |
---|
809 | // straightforward computation of joint constraint forces: |
---|
810 | // multiply related lambdas with respective J' block for joints |
---|
811 | // where feedback was requested |
---|
812 | mfb = 0; |
---|
813 | for (i=0; i<nj; i++) { |
---|
814 | if (joint[i]->feedback) { |
---|
815 | dJointFeedback *fb = joint[i]->feedback; |
---|
816 | dReal data[6]; |
---|
817 | Multiply1_12q1 (data, Jcopy+mfb*12, lambda+ofs[i], info[i].m); |
---|
818 | fb->f1[0] = data[0]; |
---|
819 | fb->f1[1] = data[1]; |
---|
820 | fb->f1[2] = data[2]; |
---|
821 | fb->t1[0] = data[3]; |
---|
822 | fb->t1[1] = data[4]; |
---|
823 | fb->t1[2] = data[5]; |
---|
824 | if (joint[i]->node[1].body) |
---|
825 | { |
---|
826 | Multiply1_12q1 (data, Jcopy+mfb*12+6, lambda+ofs[i], info[i].m); |
---|
827 | fb->f2[0] = data[0]; |
---|
828 | fb->f2[1] = data[1]; |
---|
829 | fb->f2[2] = data[2]; |
---|
830 | fb->t2[0] = data[3]; |
---|
831 | fb->t2[1] = data[4]; |
---|
832 | fb->t2[2] = data[5]; |
---|
833 | } |
---|
834 | mfb += info[i].m; |
---|
835 | } |
---|
836 | } |
---|
837 | } |
---|
838 | } |
---|
839 | |
---|
840 | // compute the velocity update: |
---|
841 | // add stepsize * invM * fe to the body velocity |
---|
842 | |
---|
843 | IFTIMING (dTimerNow ("compute velocity update");) |
---|
844 | for (i=0; i<nb; i++) { |
---|
845 | dReal body_invMass = body[i]->invMass; |
---|
846 | for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * body_invMass * body[i]->facc[j]; |
---|
847 | for (j=0; j<3; j++) body[i]->tacc[j] *= stepsize; |
---|
848 | dMULTIPLYADD0_331 (body[i]->avel,invI + i*12,body[i]->tacc); |
---|
849 | } |
---|
850 | |
---|
851 | #if 0 |
---|
852 | // check that the updated velocity obeys the constraint (this check needs unmodified J) |
---|
853 | dRealAllocaArray (vel,nb*6); |
---|
854 | for (i=0; i<nb; i++) { |
---|
855 | for (j=0; j<3; j++) vel[i*6+j] = body[i]->lvel[j]; |
---|
856 | for (j=0; j<3; j++) vel[i*6+3+j] = body[i]->avel[j]; |
---|
857 | } |
---|
858 | dRealAllocaArray (tmp,m); |
---|
859 | multiply_J (m,J,jb,vel,tmp); |
---|
860 | dReal error = 0; |
---|
861 | for (i=0; i<m; i++) error += dFabs(tmp[i]); |
---|
862 | printf ("velocity error = %10.6e\n",error); |
---|
863 | #endif |
---|
864 | |
---|
865 | // update the position and orientation from the new linear/angular velocity |
---|
866 | // (over the given timestep) |
---|
867 | IFTIMING (dTimerNow ("update position");) |
---|
868 | for (i=0; i<nb; i++) dxStepBody (body[i],stepsize); |
---|
869 | |
---|
870 | IFTIMING (dTimerNow ("tidy up");) |
---|
871 | |
---|
872 | // zero all force accumulators |
---|
873 | for (i=0; i<nb; i++) { |
---|
874 | dSetZero (body[i]->facc,3); |
---|
875 | dSetZero (body[i]->tacc,3); |
---|
876 | } |
---|
877 | |
---|
878 | IFTIMING (dTimerEnd();) |
---|
879 | IFTIMING (if (m > 0) dTimerReport (stdout,1);) |
---|
880 | } |
---|