[1] | 1 | /* |
---|
| 2 | ----------------------------------------------------------------------------- |
---|
| 3 | This source file is part of OGRE |
---|
| 4 | (Object-oriented Graphics Rendering Engine) |
---|
| 5 | For the latest info, see http://www.ogre3d.org/ |
---|
| 6 | |
---|
| 7 | Copyright (c) 2000-2006 Torus Knot Software Ltd |
---|
| 8 | Also see acknowledgements in Readme.html |
---|
| 9 | |
---|
| 10 | This program is free software; you can redistribute it and/or modify it under |
---|
| 11 | the terms of the GNU Lesser General Public License as published by the Free Software |
---|
| 12 | Foundation; either version 2 of the License, or (at your option) any later |
---|
| 13 | version. |
---|
| 14 | |
---|
| 15 | This program is distributed in the hope that it will be useful, but WITHOUT |
---|
| 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
---|
| 17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. |
---|
| 18 | |
---|
| 19 | You should have received a copy of the GNU Lesser General Public License along with |
---|
| 20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
---|
| 21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to |
---|
| 22 | http://www.gnu.org/copyleft/lesser.txt. |
---|
| 23 | |
---|
| 24 | You may alternatively use this source under the terms of a specific version of |
---|
| 25 | the OGRE Unrestricted License provided you have obtained such a license from |
---|
| 26 | Torus Knot Software Ltd. |
---|
| 27 | ----------------------------------------------------------------------------- |
---|
| 28 | */ |
---|
| 29 | #include "OgreStableHeaders.h" |
---|
| 30 | |
---|
| 31 | #include "OgreOptimisedUtil.h" |
---|
| 32 | |
---|
| 33 | #include "OgreVector3.h" |
---|
| 34 | #include "OgreMatrix4.h" |
---|
| 35 | |
---|
| 36 | namespace Ogre { |
---|
| 37 | |
---|
| 38 | //------------------------------------------------------------------------- |
---|
| 39 | // Local classes |
---|
| 40 | //------------------------------------------------------------------------- |
---|
| 41 | |
---|
| 42 | /** General implementation of OptimisedUtil. |
---|
| 43 | @note |
---|
| 44 | Don't use this class directly, use OptimisedUtil instead. |
---|
| 45 | */ |
---|
| 46 | class _OgrePrivate OptimisedUtilGeneral : public OptimisedUtil |
---|
| 47 | { |
---|
| 48 | public: |
---|
| 49 | /// @copydoc OptimisedUtil::softwareVertexSkinning |
---|
| 50 | virtual void softwareVertexSkinning( |
---|
| 51 | const float *srcPosPtr, float *destPosPtr, |
---|
| 52 | const float *srcNormPtr, float *destNormPtr, |
---|
| 53 | const float *blendWeightPtr, const unsigned char* blendIndexPtr, |
---|
| 54 | const Matrix4* const* blendMatrices, |
---|
| 55 | size_t srcPosStride, size_t destPosStride, |
---|
| 56 | size_t srcNormStride, size_t destNormStride, |
---|
| 57 | size_t blendWeightStride, size_t blendIndexStride, |
---|
| 58 | size_t numWeightsPerVertex, |
---|
| 59 | size_t numVertices); |
---|
| 60 | |
---|
| 61 | /// @copydoc OptimisedUtil::softwareVertexMorph |
---|
| 62 | virtual void softwareVertexMorph( |
---|
| 63 | Real t, |
---|
| 64 | const float *srcPos1, const float *srcPos2, |
---|
| 65 | float *dstPos, |
---|
| 66 | size_t numVertices); |
---|
| 67 | |
---|
| 68 | /// @copydoc OptimisedUtil::concatenateAffineMatrices |
---|
| 69 | virtual void concatenateAffineMatrices( |
---|
| 70 | const Matrix4& baseMatrix, |
---|
| 71 | const Matrix4* srcMatrices, |
---|
| 72 | Matrix4* dstMatrices, |
---|
| 73 | size_t numMatrices); |
---|
| 74 | |
---|
| 75 | /// @copydoc OptimisedUtil::calculateFaceNormals |
---|
| 76 | virtual void calculateFaceNormals( |
---|
| 77 | const float *positions, |
---|
| 78 | const EdgeData::Triangle *triangles, |
---|
| 79 | Vector4 *faceNormals, |
---|
| 80 | size_t numTriangles); |
---|
| 81 | |
---|
| 82 | /// @copydoc OptimisedUtil::calculateLightFacing |
---|
| 83 | virtual void calculateLightFacing( |
---|
| 84 | const Vector4& lightPos, |
---|
| 85 | const Vector4* faceNormals, |
---|
| 86 | char* lightFacings, |
---|
| 87 | size_t numFaces); |
---|
| 88 | |
---|
| 89 | /// @copydoc OptimisedUtil::extrudeVertices |
---|
| 90 | virtual void extrudeVertices( |
---|
| 91 | const Vector4& lightPos, |
---|
| 92 | Real extrudeDist, |
---|
| 93 | const float* srcPositions, |
---|
| 94 | float* destPositions, |
---|
| 95 | size_t numVertices); |
---|
| 96 | }; |
---|
| 97 | //--------------------------------------------------------------------- |
---|
| 98 | //--------------------------------------------------------------------- |
---|
| 99 | //--------------------------------------------------------------------- |
---|
| 100 | void OptimisedUtilGeneral::softwareVertexSkinning( |
---|
| 101 | const float *pSrcPos, float *pDestPos, |
---|
| 102 | const float *pSrcNorm, float *pDestNorm, |
---|
| 103 | const float *pBlendWeight, const unsigned char* pBlendIndex, |
---|
| 104 | const Matrix4* const* blendMatrices, |
---|
| 105 | size_t srcPosStride, size_t destPosStride, |
---|
| 106 | size_t srcNormStride, size_t destNormStride, |
---|
| 107 | size_t blendWeightStride, size_t blendIndexStride, |
---|
| 108 | size_t numWeightsPerVertex, |
---|
| 109 | size_t numVertices) |
---|
| 110 | { |
---|
| 111 | // Source vectors |
---|
| 112 | Vector3 sourceVec, sourceNorm; |
---|
| 113 | // Accumulation vectors |
---|
| 114 | Vector3 accumVecPos, accumVecNorm; |
---|
| 115 | |
---|
| 116 | // Loop per vertex |
---|
| 117 | for (size_t vertIdx = 0; vertIdx < numVertices; ++vertIdx) |
---|
| 118 | { |
---|
| 119 | // Load source vertex elements |
---|
| 120 | sourceVec.x = pSrcPos[0]; |
---|
| 121 | sourceVec.y = pSrcPos[1]; |
---|
| 122 | sourceVec.z = pSrcPos[2]; |
---|
| 123 | |
---|
| 124 | if (pSrcNorm) |
---|
| 125 | { |
---|
| 126 | sourceNorm.x = pSrcNorm[0]; |
---|
| 127 | sourceNorm.y = pSrcNorm[1]; |
---|
| 128 | sourceNorm.z = pSrcNorm[2]; |
---|
| 129 | } |
---|
| 130 | |
---|
| 131 | // Load accumulators |
---|
| 132 | accumVecPos = Vector3::ZERO; |
---|
| 133 | accumVecNorm = Vector3::ZERO; |
---|
| 134 | |
---|
| 135 | // Loop per blend weight |
---|
| 136 | // |
---|
| 137 | // Note: Don't change "unsigned short" here!!! If use "size_t" instead, |
---|
| 138 | // VC7.1 unroll this loop to four blend weights pre-iteration, and then |
---|
| 139 | // loss performance 10% in this function. Ok, this give a hint that we |
---|
| 140 | // should unroll this loop manually for better performance, will do that |
---|
| 141 | // later. |
---|
| 142 | // |
---|
| 143 | for (unsigned short blendIdx = 0; blendIdx < numWeightsPerVertex; ++blendIdx) |
---|
| 144 | { |
---|
| 145 | // Blend by multiplying source by blend matrix and scaling by weight |
---|
| 146 | // Add to accumulator |
---|
| 147 | // NB weights must be normalised!! |
---|
| 148 | Real weight = pBlendWeight[blendIdx]; |
---|
| 149 | if (weight) |
---|
| 150 | { |
---|
| 151 | // Blend position, use 3x4 matrix |
---|
| 152 | const Matrix4& mat = *blendMatrices[pBlendIndex[blendIdx]]; |
---|
| 153 | accumVecPos.x += |
---|
| 154 | (mat[0][0] * sourceVec.x + |
---|
| 155 | mat[0][1] * sourceVec.y + |
---|
| 156 | mat[0][2] * sourceVec.z + |
---|
| 157 | mat[0][3]) |
---|
| 158 | * weight; |
---|
| 159 | accumVecPos.y += |
---|
| 160 | (mat[1][0] * sourceVec.x + |
---|
| 161 | mat[1][1] * sourceVec.y + |
---|
| 162 | mat[1][2] * sourceVec.z + |
---|
| 163 | mat[1][3]) |
---|
| 164 | * weight; |
---|
| 165 | accumVecPos.z += |
---|
| 166 | (mat[2][0] * sourceVec.x + |
---|
| 167 | mat[2][1] * sourceVec.y + |
---|
| 168 | mat[2][2] * sourceVec.z + |
---|
| 169 | mat[2][3]) |
---|
| 170 | * weight; |
---|
| 171 | if (pSrcNorm) |
---|
| 172 | { |
---|
| 173 | // Blend normal |
---|
| 174 | // We should blend by inverse transpose here, but because we're assuming the 3x3 |
---|
| 175 | // aspect of the matrix is orthogonal (no non-uniform scaling), the inverse transpose |
---|
| 176 | // is equal to the main 3x3 matrix |
---|
| 177 | // Note because it's a normal we just extract the rotational part, saves us renormalising here |
---|
| 178 | accumVecNorm.x += |
---|
| 179 | (mat[0][0] * sourceNorm.x + |
---|
| 180 | mat[0][1] * sourceNorm.y + |
---|
| 181 | mat[0][2] * sourceNorm.z) |
---|
| 182 | * weight; |
---|
| 183 | accumVecNorm.y += |
---|
| 184 | (mat[1][0] * sourceNorm.x + |
---|
| 185 | mat[1][1] * sourceNorm.y + |
---|
| 186 | mat[1][2] * sourceNorm.z) |
---|
| 187 | * weight; |
---|
| 188 | accumVecNorm.z += |
---|
| 189 | (mat[2][0] * sourceNorm.x + |
---|
| 190 | mat[2][1] * sourceNorm.y + |
---|
| 191 | mat[2][2] * sourceNorm.z) |
---|
| 192 | * weight; |
---|
| 193 | } |
---|
| 194 | } |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | // Stored blended vertex in hardware buffer |
---|
| 198 | pDestPos[0] = accumVecPos.x; |
---|
| 199 | pDestPos[1] = accumVecPos.y; |
---|
| 200 | pDestPos[2] = accumVecPos.z; |
---|
| 201 | |
---|
| 202 | // Stored blended vertex in temp buffer |
---|
| 203 | if (pSrcNorm) |
---|
| 204 | { |
---|
| 205 | // Normalise |
---|
| 206 | accumVecNorm.normalise(); |
---|
| 207 | pDestNorm[0] = accumVecNorm.x; |
---|
| 208 | pDestNorm[1] = accumVecNorm.y; |
---|
| 209 | pDestNorm[2] = accumVecNorm.z; |
---|
| 210 | // Advance pointers |
---|
| 211 | advanceRawPointer(pSrcNorm, srcNormStride); |
---|
| 212 | advanceRawPointer(pDestNorm, destNormStride); |
---|
| 213 | } |
---|
| 214 | |
---|
| 215 | // Advance pointers |
---|
| 216 | advanceRawPointer(pSrcPos, srcPosStride); |
---|
| 217 | advanceRawPointer(pDestPos, destPosStride); |
---|
| 218 | advanceRawPointer(pBlendWeight, blendWeightStride); |
---|
| 219 | advanceRawPointer(pBlendIndex, blendIndexStride); |
---|
| 220 | } |
---|
| 221 | } |
---|
| 222 | //--------------------------------------------------------------------- |
---|
| 223 | void OptimisedUtilGeneral::concatenateAffineMatrices( |
---|
| 224 | const Matrix4& baseMatrix, |
---|
| 225 | const Matrix4* pSrcMat, |
---|
| 226 | Matrix4* pDstMat, |
---|
| 227 | size_t numMatrices) |
---|
| 228 | { |
---|
| 229 | const Matrix4& m = baseMatrix; |
---|
| 230 | |
---|
| 231 | for (size_t i = 0; i < numMatrices; ++i) |
---|
| 232 | { |
---|
| 233 | const Matrix4& s = *pSrcMat; |
---|
| 234 | Matrix4& d = *pDstMat; |
---|
| 235 | |
---|
| 236 | // TODO: Promote following code to Matrix4 class. |
---|
| 237 | |
---|
| 238 | d[0][0] = m[0][0] * s[0][0] + m[0][1] * s[1][0] + m[0][2] * s[2][0]; |
---|
| 239 | d[0][1] = m[0][0] * s[0][1] + m[0][1] * s[1][1] + m[0][2] * s[2][1]; |
---|
| 240 | d[0][2] = m[0][0] * s[0][2] + m[0][1] * s[1][2] + m[0][2] * s[2][2]; |
---|
| 241 | d[0][3] = m[0][0] * s[0][3] + m[0][1] * s[1][3] + m[0][2] * s[2][3] + m[0][3]; |
---|
| 242 | |
---|
| 243 | d[1][0] = m[1][0] * s[0][0] + m[1][1] * s[1][0] + m[1][2] * s[2][0]; |
---|
| 244 | d[1][1] = m[1][0] * s[0][1] + m[1][1] * s[1][1] + m[1][2] * s[2][1]; |
---|
| 245 | d[1][2] = m[1][0] * s[0][2] + m[1][1] * s[1][2] + m[1][2] * s[2][2]; |
---|
| 246 | d[1][3] = m[1][0] * s[0][3] + m[1][1] * s[1][3] + m[1][2] * s[2][3] + m[1][3]; |
---|
| 247 | |
---|
| 248 | d[2][0] = m[2][0] * s[0][0] + m[2][1] * s[1][0] + m[2][2] * s[2][0]; |
---|
| 249 | d[2][1] = m[2][0] * s[0][1] + m[2][1] * s[1][1] + m[2][2] * s[2][1]; |
---|
| 250 | d[2][2] = m[2][0] * s[0][2] + m[2][1] * s[1][2] + m[2][2] * s[2][2]; |
---|
| 251 | d[2][3] = m[2][0] * s[0][3] + m[2][1] * s[1][3] + m[2][2] * s[2][3] + m[2][3]; |
---|
| 252 | |
---|
| 253 | d[3][0] = 0; |
---|
| 254 | d[3][1] = 0; |
---|
| 255 | d[3][2] = 0; |
---|
| 256 | d[3][3] = 1; |
---|
| 257 | |
---|
| 258 | ++pSrcMat; |
---|
| 259 | ++pDstMat; |
---|
| 260 | } |
---|
| 261 | } |
---|
| 262 | //--------------------------------------------------------------------- |
---|
| 263 | void OptimisedUtilGeneral::softwareVertexMorph( |
---|
| 264 | Real t, |
---|
| 265 | const float *pSrc1, const float *pSrc2, |
---|
| 266 | float *pDst, |
---|
| 267 | size_t numVertices) |
---|
| 268 | { |
---|
| 269 | for (size_t i = 0; i < numVertices; ++i) |
---|
| 270 | { |
---|
| 271 | // x |
---|
| 272 | *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; |
---|
| 273 | ++pSrc1; ++pSrc2; |
---|
| 274 | // y |
---|
| 275 | *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; |
---|
| 276 | ++pSrc1; ++pSrc2; |
---|
| 277 | // z |
---|
| 278 | *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; |
---|
| 279 | ++pSrc1; ++pSrc2; |
---|
| 280 | } |
---|
| 281 | } |
---|
| 282 | //--------------------------------------------------------------------- |
---|
| 283 | void OptimisedUtilGeneral::calculateFaceNormals( |
---|
| 284 | const float *positions, |
---|
| 285 | const EdgeData::Triangle *triangles, |
---|
| 286 | Vector4 *faceNormals, |
---|
| 287 | size_t numTriangles) |
---|
| 288 | { |
---|
| 289 | for ( ; numTriangles; --numTriangles) |
---|
| 290 | { |
---|
| 291 | const EdgeData::Triangle& t = *triangles++; |
---|
| 292 | size_t offset; |
---|
| 293 | |
---|
| 294 | offset = t.vertIndex[0] * 3; |
---|
| 295 | Vector3 v1(positions[offset+0], positions[offset+1], positions[offset+2]); |
---|
| 296 | |
---|
| 297 | offset = t.vertIndex[1] * 3; |
---|
| 298 | Vector3 v2(positions[offset+0], positions[offset+1], positions[offset+2]); |
---|
| 299 | |
---|
| 300 | offset = t.vertIndex[2] * 3; |
---|
| 301 | Vector3 v3(positions[offset+0], positions[offset+1], positions[offset+2]); |
---|
| 302 | |
---|
| 303 | *faceNormals++ = Math::calculateFaceNormalWithoutNormalize(v1, v2, v3); |
---|
| 304 | } |
---|
| 305 | } |
---|
| 306 | //--------------------------------------------------------------------- |
---|
| 307 | void OptimisedUtilGeneral::calculateLightFacing( |
---|
| 308 | const Vector4& lightPos, |
---|
| 309 | const Vector4* faceNormals, |
---|
| 310 | char* lightFacings, |
---|
| 311 | size_t numFaces) |
---|
| 312 | { |
---|
| 313 | for (size_t i = 0; i < numFaces; ++i) |
---|
| 314 | { |
---|
| 315 | *lightFacings++ = (lightPos.dotProduct(*faceNormals++) > 0); |
---|
| 316 | } |
---|
| 317 | } |
---|
| 318 | //--------------------------------------------------------------------- |
---|
| 319 | void OptimisedUtilGeneral::extrudeVertices( |
---|
| 320 | const Vector4& lightPos, |
---|
| 321 | Real extrudeDist, |
---|
| 322 | const float* pSrcPos, |
---|
| 323 | float* pDestPos, |
---|
| 324 | size_t numVertices) |
---|
| 325 | { |
---|
| 326 | if (lightPos.w == 0.0f) |
---|
| 327 | { |
---|
| 328 | // Directional light, extrusion is along light direction |
---|
| 329 | |
---|
| 330 | Vector3 extrusionDir( |
---|
| 331 | -lightPos.x, |
---|
| 332 | -lightPos.y, |
---|
| 333 | -lightPos.z); |
---|
| 334 | extrusionDir.normalise(); |
---|
| 335 | extrusionDir *= extrudeDist; |
---|
| 336 | |
---|
| 337 | for (size_t vert = 0; vert < numVertices; ++vert) |
---|
| 338 | { |
---|
| 339 | *pDestPos++ = *pSrcPos++ + extrusionDir.x; |
---|
| 340 | *pDestPos++ = *pSrcPos++ + extrusionDir.y; |
---|
| 341 | *pDestPos++ = *pSrcPos++ + extrusionDir.z; |
---|
| 342 | } |
---|
| 343 | } |
---|
| 344 | else |
---|
| 345 | { |
---|
| 346 | // Point light, calculate extrusionDir for every vertex |
---|
| 347 | assert(lightPos.w == 1.0f); |
---|
| 348 | |
---|
| 349 | for (size_t vert = 0; vert < numVertices; ++vert) |
---|
| 350 | { |
---|
| 351 | Vector3 extrusionDir( |
---|
| 352 | pSrcPos[0] - lightPos.x, |
---|
| 353 | pSrcPos[1] - lightPos.y, |
---|
| 354 | pSrcPos[2] - lightPos.z); |
---|
| 355 | extrusionDir.normalise(); |
---|
| 356 | extrusionDir *= extrudeDist; |
---|
| 357 | |
---|
| 358 | *pDestPos++ = *pSrcPos++ + extrusionDir.x; |
---|
| 359 | *pDestPos++ = *pSrcPos++ + extrusionDir.y; |
---|
| 360 | *pDestPos++ = *pSrcPos++ + extrusionDir.z; |
---|
| 361 | } |
---|
| 362 | } |
---|
| 363 | } |
---|
| 364 | //--------------------------------------------------------------------- |
---|
| 365 | //--------------------------------------------------------------------- |
---|
| 366 | //--------------------------------------------------------------------- |
---|
| 367 | extern OptimisedUtil* _getOptimisedUtilGeneral(void) |
---|
| 368 | { |
---|
| 369 | static OptimisedUtilGeneral msOptimisedUtilGeneral; |
---|
| 370 | return &msOptimisedUtilGeneral; |
---|
| 371 | } |
---|
| 372 | |
---|
| 373 | } |
---|