[216] | 1 | /* generated code, do not edit. */ |
---|
| 2 | |
---|
| 3 | #include "ode/matrix.h" |
---|
| 4 | |
---|
| 5 | /* solve L^T * x=b, with b containing 1 right hand side. |
---|
| 6 | * L is an n*n lower triangular matrix with ones on the diagonal. |
---|
| 7 | * L is stored by rows and its leading dimension is lskip. |
---|
| 8 | * b is an n*1 matrix that contains the right hand side. |
---|
| 9 | * b is overwritten with x. |
---|
| 10 | * this processes blocks of 4. |
---|
| 11 | */ |
---|
| 12 | |
---|
| 13 | void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1) |
---|
| 14 | { |
---|
| 15 | /* declare variables - Z matrix, p and q vectors, etc */ |
---|
| 16 | dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex; |
---|
| 17 | const dReal *ell; |
---|
| 18 | int lskip2,lskip3,i,j; |
---|
| 19 | /* special handling for L and B because we're solving L1 *transpose* */ |
---|
| 20 | L = L + (n-1)*(lskip1+1); |
---|
| 21 | B = B + n-1; |
---|
| 22 | lskip1 = -lskip1; |
---|
| 23 | /* compute lskip values */ |
---|
| 24 | lskip2 = 2*lskip1; |
---|
| 25 | lskip3 = 3*lskip1; |
---|
| 26 | /* compute all 4 x 1 blocks of X */ |
---|
| 27 | for (i=0; i <= n-4; i+=4) { |
---|
| 28 | /* compute all 4 x 1 block of X, from rows i..i+4-1 */ |
---|
| 29 | /* set the Z matrix to 0 */ |
---|
| 30 | Z11=0; |
---|
| 31 | Z21=0; |
---|
| 32 | Z31=0; |
---|
| 33 | Z41=0; |
---|
| 34 | ell = L - i; |
---|
| 35 | ex = B; |
---|
| 36 | /* the inner loop that computes outer products and adds them to Z */ |
---|
| 37 | for (j=i-4; j >= 0; j -= 4) { |
---|
| 38 | /* load p and q values */ |
---|
| 39 | p1=ell[0]; |
---|
| 40 | q1=ex[0]; |
---|
| 41 | p2=ell[-1]; |
---|
| 42 | p3=ell[-2]; |
---|
| 43 | p4=ell[-3]; |
---|
| 44 | /* compute outer product and add it to the Z matrix */ |
---|
| 45 | m11 = p1 * q1; |
---|
| 46 | m21 = p2 * q1; |
---|
| 47 | m31 = p3 * q1; |
---|
| 48 | m41 = p4 * q1; |
---|
| 49 | ell += lskip1; |
---|
| 50 | Z11 += m11; |
---|
| 51 | Z21 += m21; |
---|
| 52 | Z31 += m31; |
---|
| 53 | Z41 += m41; |
---|
| 54 | /* load p and q values */ |
---|
| 55 | p1=ell[0]; |
---|
| 56 | q1=ex[-1]; |
---|
| 57 | p2=ell[-1]; |
---|
| 58 | p3=ell[-2]; |
---|
| 59 | p4=ell[-3]; |
---|
| 60 | /* compute outer product and add it to the Z matrix */ |
---|
| 61 | m11 = p1 * q1; |
---|
| 62 | m21 = p2 * q1; |
---|
| 63 | m31 = p3 * q1; |
---|
| 64 | m41 = p4 * q1; |
---|
| 65 | ell += lskip1; |
---|
| 66 | Z11 += m11; |
---|
| 67 | Z21 += m21; |
---|
| 68 | Z31 += m31; |
---|
| 69 | Z41 += m41; |
---|
| 70 | /* load p and q values */ |
---|
| 71 | p1=ell[0]; |
---|
| 72 | q1=ex[-2]; |
---|
| 73 | p2=ell[-1]; |
---|
| 74 | p3=ell[-2]; |
---|
| 75 | p4=ell[-3]; |
---|
| 76 | /* compute outer product and add it to the Z matrix */ |
---|
| 77 | m11 = p1 * q1; |
---|
| 78 | m21 = p2 * q1; |
---|
| 79 | m31 = p3 * q1; |
---|
| 80 | m41 = p4 * q1; |
---|
| 81 | ell += lskip1; |
---|
| 82 | Z11 += m11; |
---|
| 83 | Z21 += m21; |
---|
| 84 | Z31 += m31; |
---|
| 85 | Z41 += m41; |
---|
| 86 | /* load p and q values */ |
---|
| 87 | p1=ell[0]; |
---|
| 88 | q1=ex[-3]; |
---|
| 89 | p2=ell[-1]; |
---|
| 90 | p3=ell[-2]; |
---|
| 91 | p4=ell[-3]; |
---|
| 92 | /* compute outer product and add it to the Z matrix */ |
---|
| 93 | m11 = p1 * q1; |
---|
| 94 | m21 = p2 * q1; |
---|
| 95 | m31 = p3 * q1; |
---|
| 96 | m41 = p4 * q1; |
---|
| 97 | ell += lskip1; |
---|
| 98 | ex -= 4; |
---|
| 99 | Z11 += m11; |
---|
| 100 | Z21 += m21; |
---|
| 101 | Z31 += m31; |
---|
| 102 | Z41 += m41; |
---|
| 103 | /* end of inner loop */ |
---|
| 104 | } |
---|
| 105 | /* compute left-over iterations */ |
---|
| 106 | j += 4; |
---|
| 107 | for (; j > 0; j--) { |
---|
| 108 | /* load p and q values */ |
---|
| 109 | p1=ell[0]; |
---|
| 110 | q1=ex[0]; |
---|
| 111 | p2=ell[-1]; |
---|
| 112 | p3=ell[-2]; |
---|
| 113 | p4=ell[-3]; |
---|
| 114 | /* compute outer product and add it to the Z matrix */ |
---|
| 115 | m11 = p1 * q1; |
---|
| 116 | m21 = p2 * q1; |
---|
| 117 | m31 = p3 * q1; |
---|
| 118 | m41 = p4 * q1; |
---|
| 119 | ell += lskip1; |
---|
| 120 | ex -= 1; |
---|
| 121 | Z11 += m11; |
---|
| 122 | Z21 += m21; |
---|
| 123 | Z31 += m31; |
---|
| 124 | Z41 += m41; |
---|
| 125 | } |
---|
| 126 | /* finish computing the X(i) block */ |
---|
| 127 | Z11 = ex[0] - Z11; |
---|
| 128 | ex[0] = Z11; |
---|
| 129 | p1 = ell[-1]; |
---|
| 130 | Z21 = ex[-1] - Z21 - p1*Z11; |
---|
| 131 | ex[-1] = Z21; |
---|
| 132 | p1 = ell[-2]; |
---|
| 133 | p2 = ell[-2+lskip1]; |
---|
| 134 | Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21; |
---|
| 135 | ex[-2] = Z31; |
---|
| 136 | p1 = ell[-3]; |
---|
| 137 | p2 = ell[-3+lskip1]; |
---|
| 138 | p3 = ell[-3+lskip2]; |
---|
| 139 | Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; |
---|
| 140 | ex[-3] = Z41; |
---|
| 141 | /* end of outer loop */ |
---|
| 142 | } |
---|
| 143 | /* compute rows at end that are not a multiple of block size */ |
---|
| 144 | for (; i < n; i++) { |
---|
| 145 | /* compute all 1 x 1 block of X, from rows i..i+1-1 */ |
---|
| 146 | /* set the Z matrix to 0 */ |
---|
| 147 | Z11=0; |
---|
| 148 | ell = L - i; |
---|
| 149 | ex = B; |
---|
| 150 | /* the inner loop that computes outer products and adds them to Z */ |
---|
| 151 | for (j=i-4; j >= 0; j -= 4) { |
---|
| 152 | /* load p and q values */ |
---|
| 153 | p1=ell[0]; |
---|
| 154 | q1=ex[0]; |
---|
| 155 | /* compute outer product and add it to the Z matrix */ |
---|
| 156 | m11 = p1 * q1; |
---|
| 157 | ell += lskip1; |
---|
| 158 | Z11 += m11; |
---|
| 159 | /* load p and q values */ |
---|
| 160 | p1=ell[0]; |
---|
| 161 | q1=ex[-1]; |
---|
| 162 | /* compute outer product and add it to the Z matrix */ |
---|
| 163 | m11 = p1 * q1; |
---|
| 164 | ell += lskip1; |
---|
| 165 | Z11 += m11; |
---|
| 166 | /* load p and q values */ |
---|
| 167 | p1=ell[0]; |
---|
| 168 | q1=ex[-2]; |
---|
| 169 | /* compute outer product and add it to the Z matrix */ |
---|
| 170 | m11 = p1 * q1; |
---|
| 171 | ell += lskip1; |
---|
| 172 | Z11 += m11; |
---|
| 173 | /* load p and q values */ |
---|
| 174 | p1=ell[0]; |
---|
| 175 | q1=ex[-3]; |
---|
| 176 | /* compute outer product and add it to the Z matrix */ |
---|
| 177 | m11 = p1 * q1; |
---|
| 178 | ell += lskip1; |
---|
| 179 | ex -= 4; |
---|
| 180 | Z11 += m11; |
---|
| 181 | /* end of inner loop */ |
---|
| 182 | } |
---|
| 183 | /* compute left-over iterations */ |
---|
| 184 | j += 4; |
---|
| 185 | for (; j > 0; j--) { |
---|
| 186 | /* load p and q values */ |
---|
| 187 | p1=ell[0]; |
---|
| 188 | q1=ex[0]; |
---|
| 189 | /* compute outer product and add it to the Z matrix */ |
---|
| 190 | m11 = p1 * q1; |
---|
| 191 | ell += lskip1; |
---|
| 192 | ex -= 1; |
---|
| 193 | Z11 += m11; |
---|
| 194 | } |
---|
| 195 | /* finish computing the X(i) block */ |
---|
| 196 | Z11 = ex[0] - Z11; |
---|
| 197 | ex[0] = Z11; |
---|
| 198 | } |
---|
| 199 | } |
---|