Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: code/branches/ode/ode-0.9/ode/src/fastltsolve.c @ 216

Last change on this file since 216 was 216, checked in by mathiask, 16 years ago

[Physik] add ode-0.9

File size: 4.9 KB
Line 
1/* generated code, do not edit. */
2
3#include "ode/matrix.h"
4
5/* solve L^T * x=b, with b containing 1 right hand side.
6 * L is an n*n lower triangular matrix with ones on the diagonal.
7 * L is stored by rows and its leading dimension is lskip.
8 * b is an n*1 matrix that contains the right hand side.
9 * b is overwritten with x.
10 * this processes blocks of 4.
11 */
12
13void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1)
14{ 
15  /* declare variables - Z matrix, p and q vectors, etc */
16  dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
17  const dReal *ell;
18  int lskip2,lskip3,i,j;
19  /* special handling for L and B because we're solving L1 *transpose* */
20  L = L + (n-1)*(lskip1+1);
21  B = B + n-1;
22  lskip1 = -lskip1;
23  /* compute lskip values */
24  lskip2 = 2*lskip1;
25  lskip3 = 3*lskip1;
26  /* compute all 4 x 1 blocks of X */
27  for (i=0; i <= n-4; i+=4) {
28    /* compute all 4 x 1 block of X, from rows i..i+4-1 */
29    /* set the Z matrix to 0 */
30    Z11=0;
31    Z21=0;
32    Z31=0;
33    Z41=0;
34    ell = L - i;
35    ex = B;
36    /* the inner loop that computes outer products and adds them to Z */
37    for (j=i-4; j >= 0; j -= 4) {
38      /* load p and q values */
39      p1=ell[0];
40      q1=ex[0];
41      p2=ell[-1];
42      p3=ell[-2];
43      p4=ell[-3];
44      /* compute outer product and add it to the Z matrix */
45      m11 = p1 * q1;
46      m21 = p2 * q1;
47      m31 = p3 * q1;
48      m41 = p4 * q1;
49      ell += lskip1;
50      Z11 += m11;
51      Z21 += m21;
52      Z31 += m31;
53      Z41 += m41;
54      /* load p and q values */
55      p1=ell[0];
56      q1=ex[-1];
57      p2=ell[-1];
58      p3=ell[-2];
59      p4=ell[-3];
60      /* compute outer product and add it to the Z matrix */
61      m11 = p1 * q1;
62      m21 = p2 * q1;
63      m31 = p3 * q1;
64      m41 = p4 * q1;
65      ell += lskip1;
66      Z11 += m11;
67      Z21 += m21;
68      Z31 += m31;
69      Z41 += m41;
70      /* load p and q values */
71      p1=ell[0];
72      q1=ex[-2];
73      p2=ell[-1];
74      p3=ell[-2];
75      p4=ell[-3];
76      /* compute outer product and add it to the Z matrix */
77      m11 = p1 * q1;
78      m21 = p2 * q1;
79      m31 = p3 * q1;
80      m41 = p4 * q1;
81      ell += lskip1;
82      Z11 += m11;
83      Z21 += m21;
84      Z31 += m31;
85      Z41 += m41;
86      /* load p and q values */
87      p1=ell[0];
88      q1=ex[-3];
89      p2=ell[-1];
90      p3=ell[-2];
91      p4=ell[-3];
92      /* compute outer product and add it to the Z matrix */
93      m11 = p1 * q1;
94      m21 = p2 * q1;
95      m31 = p3 * q1;
96      m41 = p4 * q1;
97      ell += lskip1;
98      ex -= 4;
99      Z11 += m11;
100      Z21 += m21;
101      Z31 += m31;
102      Z41 += m41;
103      /* end of inner loop */
104    }
105    /* compute left-over iterations */
106    j += 4;
107    for (; j > 0; j--) {
108      /* load p and q values */
109      p1=ell[0];
110      q1=ex[0];
111      p2=ell[-1];
112      p3=ell[-2];
113      p4=ell[-3];
114      /* compute outer product and add it to the Z matrix */
115      m11 = p1 * q1;
116      m21 = p2 * q1;
117      m31 = p3 * q1;
118      m41 = p4 * q1;
119      ell += lskip1;
120      ex -= 1;
121      Z11 += m11;
122      Z21 += m21;
123      Z31 += m31;
124      Z41 += m41;
125    }
126    /* finish computing the X(i) block */
127    Z11 = ex[0] - Z11;
128    ex[0] = Z11;
129    p1 = ell[-1];
130    Z21 = ex[-1] - Z21 - p1*Z11;
131    ex[-1] = Z21;
132    p1 = ell[-2];
133    p2 = ell[-2+lskip1];
134    Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
135    ex[-2] = Z31;
136    p1 = ell[-3];
137    p2 = ell[-3+lskip1];
138    p3 = ell[-3+lskip2];
139    Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
140    ex[-3] = Z41;
141    /* end of outer loop */
142  }
143  /* compute rows at end that are not a multiple of block size */
144  for (; i < n; i++) {
145    /* compute all 1 x 1 block of X, from rows i..i+1-1 */
146    /* set the Z matrix to 0 */
147    Z11=0;
148    ell = L - i;
149    ex = B;
150    /* the inner loop that computes outer products and adds them to Z */
151    for (j=i-4; j >= 0; j -= 4) {
152      /* load p and q values */
153      p1=ell[0];
154      q1=ex[0];
155      /* compute outer product and add it to the Z matrix */
156      m11 = p1 * q1;
157      ell += lskip1;
158      Z11 += m11;
159      /* load p and q values */
160      p1=ell[0];
161      q1=ex[-1];
162      /* compute outer product and add it to the Z matrix */
163      m11 = p1 * q1;
164      ell += lskip1;
165      Z11 += m11;
166      /* load p and q values */
167      p1=ell[0];
168      q1=ex[-2];
169      /* compute outer product and add it to the Z matrix */
170      m11 = p1 * q1;
171      ell += lskip1;
172      Z11 += m11;
173      /* load p and q values */
174      p1=ell[0];
175      q1=ex[-3];
176      /* compute outer product and add it to the Z matrix */
177      m11 = p1 * q1;
178      ell += lskip1;
179      ex -= 4;
180      Z11 += m11;
181      /* end of inner loop */
182    }
183    /* compute left-over iterations */
184    j += 4;
185    for (; j > 0; j--) {
186      /* load p and q values */
187      p1=ell[0];
188      q1=ex[0];
189      /* compute outer product and add it to the Z matrix */
190      m11 = p1 * q1;
191      ell += lskip1;
192      ex -= 1;
193      Z11 += m11;
194    }
195    /* finish computing the X(i) block */
196    Z11 = ex[0] - Z11;
197    ex[0] = Z11;
198  }
199}
Note: See TracBrowser for help on using the repository browser.