1 | /* generated code, do not edit. */ |
---|
2 | |
---|
3 | #include "ode/matrix.h" |
---|
4 | |
---|
5 | /* solve L^T * x=b, with b containing 1 right hand side. |
---|
6 | * L is an n*n lower triangular matrix with ones on the diagonal. |
---|
7 | * L is stored by rows and its leading dimension is lskip. |
---|
8 | * b is an n*1 matrix that contains the right hand side. |
---|
9 | * b is overwritten with x. |
---|
10 | * this processes blocks of 4. |
---|
11 | */ |
---|
12 | |
---|
13 | void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1) |
---|
14 | { |
---|
15 | /* declare variables - Z matrix, p and q vectors, etc */ |
---|
16 | dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex; |
---|
17 | const dReal *ell; |
---|
18 | int lskip2,lskip3,i,j; |
---|
19 | /* special handling for L and B because we're solving L1 *transpose* */ |
---|
20 | L = L + (n-1)*(lskip1+1); |
---|
21 | B = B + n-1; |
---|
22 | lskip1 = -lskip1; |
---|
23 | /* compute lskip values */ |
---|
24 | lskip2 = 2*lskip1; |
---|
25 | lskip3 = 3*lskip1; |
---|
26 | /* compute all 4 x 1 blocks of X */ |
---|
27 | for (i=0; i <= n-4; i+=4) { |
---|
28 | /* compute all 4 x 1 block of X, from rows i..i+4-1 */ |
---|
29 | /* set the Z matrix to 0 */ |
---|
30 | Z11=0; |
---|
31 | Z21=0; |
---|
32 | Z31=0; |
---|
33 | Z41=0; |
---|
34 | ell = L - i; |
---|
35 | ex = B; |
---|
36 | /* the inner loop that computes outer products and adds them to Z */ |
---|
37 | for (j=i-4; j >= 0; j -= 4) { |
---|
38 | /* load p and q values */ |
---|
39 | p1=ell[0]; |
---|
40 | q1=ex[0]; |
---|
41 | p2=ell[-1]; |
---|
42 | p3=ell[-2]; |
---|
43 | p4=ell[-3]; |
---|
44 | /* compute outer product and add it to the Z matrix */ |
---|
45 | m11 = p1 * q1; |
---|
46 | m21 = p2 * q1; |
---|
47 | m31 = p3 * q1; |
---|
48 | m41 = p4 * q1; |
---|
49 | ell += lskip1; |
---|
50 | Z11 += m11; |
---|
51 | Z21 += m21; |
---|
52 | Z31 += m31; |
---|
53 | Z41 += m41; |
---|
54 | /* load p and q values */ |
---|
55 | p1=ell[0]; |
---|
56 | q1=ex[-1]; |
---|
57 | p2=ell[-1]; |
---|
58 | p3=ell[-2]; |
---|
59 | p4=ell[-3]; |
---|
60 | /* compute outer product and add it to the Z matrix */ |
---|
61 | m11 = p1 * q1; |
---|
62 | m21 = p2 * q1; |
---|
63 | m31 = p3 * q1; |
---|
64 | m41 = p4 * q1; |
---|
65 | ell += lskip1; |
---|
66 | Z11 += m11; |
---|
67 | Z21 += m21; |
---|
68 | Z31 += m31; |
---|
69 | Z41 += m41; |
---|
70 | /* load p and q values */ |
---|
71 | p1=ell[0]; |
---|
72 | q1=ex[-2]; |
---|
73 | p2=ell[-1]; |
---|
74 | p3=ell[-2]; |
---|
75 | p4=ell[-3]; |
---|
76 | /* compute outer product and add it to the Z matrix */ |
---|
77 | m11 = p1 * q1; |
---|
78 | m21 = p2 * q1; |
---|
79 | m31 = p3 * q1; |
---|
80 | m41 = p4 * q1; |
---|
81 | ell += lskip1; |
---|
82 | Z11 += m11; |
---|
83 | Z21 += m21; |
---|
84 | Z31 += m31; |
---|
85 | Z41 += m41; |
---|
86 | /* load p and q values */ |
---|
87 | p1=ell[0]; |
---|
88 | q1=ex[-3]; |
---|
89 | p2=ell[-1]; |
---|
90 | p3=ell[-2]; |
---|
91 | p4=ell[-3]; |
---|
92 | /* compute outer product and add it to the Z matrix */ |
---|
93 | m11 = p1 * q1; |
---|
94 | m21 = p2 * q1; |
---|
95 | m31 = p3 * q1; |
---|
96 | m41 = p4 * q1; |
---|
97 | ell += lskip1; |
---|
98 | ex -= 4; |
---|
99 | Z11 += m11; |
---|
100 | Z21 += m21; |
---|
101 | Z31 += m31; |
---|
102 | Z41 += m41; |
---|
103 | /* end of inner loop */ |
---|
104 | } |
---|
105 | /* compute left-over iterations */ |
---|
106 | j += 4; |
---|
107 | for (; j > 0; j--) { |
---|
108 | /* load p and q values */ |
---|
109 | p1=ell[0]; |
---|
110 | q1=ex[0]; |
---|
111 | p2=ell[-1]; |
---|
112 | p3=ell[-2]; |
---|
113 | p4=ell[-3]; |
---|
114 | /* compute outer product and add it to the Z matrix */ |
---|
115 | m11 = p1 * q1; |
---|
116 | m21 = p2 * q1; |
---|
117 | m31 = p3 * q1; |
---|
118 | m41 = p4 * q1; |
---|
119 | ell += lskip1; |
---|
120 | ex -= 1; |
---|
121 | Z11 += m11; |
---|
122 | Z21 += m21; |
---|
123 | Z31 += m31; |
---|
124 | Z41 += m41; |
---|
125 | } |
---|
126 | /* finish computing the X(i) block */ |
---|
127 | Z11 = ex[0] - Z11; |
---|
128 | ex[0] = Z11; |
---|
129 | p1 = ell[-1]; |
---|
130 | Z21 = ex[-1] - Z21 - p1*Z11; |
---|
131 | ex[-1] = Z21; |
---|
132 | p1 = ell[-2]; |
---|
133 | p2 = ell[-2+lskip1]; |
---|
134 | Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21; |
---|
135 | ex[-2] = Z31; |
---|
136 | p1 = ell[-3]; |
---|
137 | p2 = ell[-3+lskip1]; |
---|
138 | p3 = ell[-3+lskip2]; |
---|
139 | Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; |
---|
140 | ex[-3] = Z41; |
---|
141 | /* end of outer loop */ |
---|
142 | } |
---|
143 | /* compute rows at end that are not a multiple of block size */ |
---|
144 | for (; i < n; i++) { |
---|
145 | /* compute all 1 x 1 block of X, from rows i..i+1-1 */ |
---|
146 | /* set the Z matrix to 0 */ |
---|
147 | Z11=0; |
---|
148 | ell = L - i; |
---|
149 | ex = B; |
---|
150 | /* the inner loop that computes outer products and adds them to Z */ |
---|
151 | for (j=i-4; j >= 0; j -= 4) { |
---|
152 | /* load p and q values */ |
---|
153 | p1=ell[0]; |
---|
154 | q1=ex[0]; |
---|
155 | /* compute outer product and add it to the Z matrix */ |
---|
156 | m11 = p1 * q1; |
---|
157 | ell += lskip1; |
---|
158 | Z11 += m11; |
---|
159 | /* load p and q values */ |
---|
160 | p1=ell[0]; |
---|
161 | q1=ex[-1]; |
---|
162 | /* compute outer product and add it to the Z matrix */ |
---|
163 | m11 = p1 * q1; |
---|
164 | ell += lskip1; |
---|
165 | Z11 += m11; |
---|
166 | /* load p and q values */ |
---|
167 | p1=ell[0]; |
---|
168 | q1=ex[-2]; |
---|
169 | /* compute outer product and add it to the Z matrix */ |
---|
170 | m11 = p1 * q1; |
---|
171 | ell += lskip1; |
---|
172 | Z11 += m11; |
---|
173 | /* load p and q values */ |
---|
174 | p1=ell[0]; |
---|
175 | q1=ex[-3]; |
---|
176 | /* compute outer product and add it to the Z matrix */ |
---|
177 | m11 = p1 * q1; |
---|
178 | ell += lskip1; |
---|
179 | ex -= 4; |
---|
180 | Z11 += m11; |
---|
181 | /* end of inner loop */ |
---|
182 | } |
---|
183 | /* compute left-over iterations */ |
---|
184 | j += 4; |
---|
185 | for (; j > 0; j--) { |
---|
186 | /* load p and q values */ |
---|
187 | p1=ell[0]; |
---|
188 | q1=ex[0]; |
---|
189 | /* compute outer product and add it to the Z matrix */ |
---|
190 | m11 = p1 * q1; |
---|
191 | ell += lskip1; |
---|
192 | ex -= 1; |
---|
193 | Z11 += m11; |
---|
194 | } |
---|
195 | /* finish computing the X(i) block */ |
---|
196 | Z11 = ex[0] - Z11; |
---|
197 | ex[0] = Z11; |
---|
198 | } |
---|
199 | } |
---|