[1966] | 1 | |
---|
| 2 | |
---|
| 3 | #include "../PlatformDefinitions.h" |
---|
| 4 | #include "SpuRaycastTask.h" |
---|
| 5 | #include "../SpuCollisionObjectWrapper.h" |
---|
| 6 | #include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h" |
---|
| 7 | #include "SpuSubSimplexConvexCast.h" |
---|
| 8 | #include "LinearMath/btAabbUtil2.h" |
---|
| 9 | |
---|
| 10 | |
---|
| 11 | /* Future optimization strategies: |
---|
| 12 | 1. BBOX prune before loading shape data |
---|
| 13 | 2. Could reduce number of dmas for ray output data to a single read and write. |
---|
| 14 | By sharing the temporary work unit output structures across objects. |
---|
| 15 | 3. The reason SpuRaycastNodeCallback1 is slower is because the triangle data isn't |
---|
| 16 | being cached across calls. Fix that by doing the final ray pruning inside the callback. |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | /* Future work: |
---|
| 20 | 1. support first hit, closest hit, etc rather than just closest hit. |
---|
| 21 | 2. support compound objects |
---|
| 22 | */ |
---|
| 23 | |
---|
| 24 | #define CALLBACK_ALL |
---|
| 25 | |
---|
| 26 | struct RaycastTask_LocalStoreMemory |
---|
| 27 | { |
---|
| 28 | ATTRIBUTE_ALIGNED16(char gColObj [sizeof(btCollisionObject)+16]); |
---|
| 29 | btCollisionObject* getColObj() |
---|
| 30 | { |
---|
| 31 | return (btCollisionObject*) gColObj; |
---|
| 32 | } |
---|
| 33 | |
---|
| 34 | ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper); |
---|
| 35 | SpuCollisionObjectWrapper* getCollisionObjectWrapper () |
---|
| 36 | { |
---|
| 37 | return &gCollisionObjectWrapper; |
---|
| 38 | } |
---|
| 39 | |
---|
| 40 | CollisionShape_LocalStoreMemory gCollisionShape; |
---|
| 41 | ATTRIBUTE_ALIGNED16(int spuIndices[16]); |
---|
| 42 | |
---|
| 43 | bvhMeshShape_LocalStoreMemory bvhShapeData; |
---|
| 44 | SpuConvexPolyhedronVertexData convexVertexData; |
---|
| 45 | CompoundShape_LocalStoreMemory compoundShapeData; |
---|
| 46 | }; |
---|
| 47 | |
---|
| 48 | #ifdef WIN32 |
---|
| 49 | void* createRaycastLocalStoreMemory() |
---|
| 50 | { |
---|
| 51 | return new RaycastTask_LocalStoreMemory; |
---|
| 52 | }; |
---|
| 53 | #elif defined(__CELLOS_LV2__) |
---|
| 54 | ATTRIBUTE_ALIGNED16(RaycastTask_LocalStoreMemory gLocalStoreMemory); |
---|
| 55 | void* createRaycastLocalStoreMemory() |
---|
| 56 | { |
---|
| 57 | return &gLocalStoreMemory; |
---|
| 58 | } |
---|
| 59 | #endif |
---|
| 60 | |
---|
| 61 | void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper) |
---|
| 62 | { |
---|
| 63 | register int dmaSize; |
---|
| 64 | register ppu_address_t dmaPpuAddress2; |
---|
| 65 | /* DMA Collision object wrapper into local store */ |
---|
| 66 | dmaSize = sizeof(SpuCollisionObjectWrapper); |
---|
| 67 | dmaPpuAddress2 = objectWrapper; |
---|
| 68 | cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); |
---|
| 69 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 70 | |
---|
| 71 | /* DMA Collision object into local store */ |
---|
| 72 | dmaSize = sizeof(btCollisionObject); |
---|
| 73 | dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr(); |
---|
| 74 | cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); |
---|
| 75 | cellDmaWaitTagStatusAll(DMA_MASK(2)); |
---|
| 76 | |
---|
| 77 | /* Gather information about collision object and shape */ |
---|
| 78 | gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform(); |
---|
| 79 | gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin (); |
---|
| 80 | gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType (); |
---|
| 81 | gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape(); |
---|
| 82 | gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape; |
---|
| 83 | |
---|
| 84 | /* DMA shape data */ |
---|
| 85 | dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType); |
---|
| 86 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 87 | if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType)) |
---|
| 88 | { |
---|
| 89 | btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape; |
---|
| 90 | gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions (); |
---|
| 91 | } else { |
---|
| 92 | gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0); |
---|
| 93 | } |
---|
| 94 | |
---|
| 95 | } |
---|
| 96 | |
---|
| 97 | void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) |
---|
| 98 | { |
---|
| 99 | cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); |
---|
| 100 | } |
---|
| 101 | |
---|
| 102 | void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) |
---|
| 103 | { |
---|
| 104 | cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); |
---|
| 105 | } |
---|
| 106 | |
---|
| 107 | #if 0 |
---|
| 108 | SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) |
---|
| 109 | { |
---|
| 110 | #if USE_SOFTWARE_CACHE |
---|
| 111 | // Check for alignment requirements. We need to make sure the entire request fits within one cache line, |
---|
| 112 | // so the first and last bytes should fall on the same cache line |
---|
| 113 | btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); |
---|
| 114 | |
---|
| 115 | void* ls = spe_cache_read(ea); |
---|
| 116 | memcpy(buffer, ls, size); |
---|
| 117 | #else |
---|
| 118 | stallingUnalignedDmaSmallGet(buffer,ea,size); |
---|
| 119 | #endif |
---|
| 120 | } |
---|
| 121 | #endif |
---|
| 122 | |
---|
| 123 | void small_cache_read_triple( void* ls0, ppu_address_t ea0, |
---|
| 124 | void* ls1, ppu_address_t ea1, |
---|
| 125 | void* ls2, ppu_address_t ea2, |
---|
| 126 | size_t size) |
---|
| 127 | { |
---|
| 128 | btAssert(size<16); |
---|
| 129 | ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]); |
---|
| 130 | ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]); |
---|
| 131 | ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]); |
---|
| 132 | |
---|
| 133 | uint32_t i; |
---|
| 134 | |
---|
| 135 | |
---|
| 136 | ///make sure last 4 bits are the same, for cellDmaSmallGet |
---|
| 137 | char* localStore0 = (char*)ls0; |
---|
| 138 | uint32_t last4BitsOffset = ea0 & 0x0f; |
---|
| 139 | char* tmpTarget0 = tmpBuffer0 + last4BitsOffset; |
---|
| 140 | tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0); |
---|
| 141 | |
---|
| 142 | |
---|
| 143 | char* localStore1 = (char*)ls1; |
---|
| 144 | last4BitsOffset = ea1 & 0x0f; |
---|
| 145 | char* tmpTarget1 = tmpBuffer1 + last4BitsOffset; |
---|
| 146 | tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0); |
---|
| 147 | |
---|
| 148 | char* localStore2 = (char*)ls2; |
---|
| 149 | last4BitsOffset = ea2 & 0x0f; |
---|
| 150 | char* tmpTarget2 = tmpBuffer2 + last4BitsOffset; |
---|
| 151 | tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0); |
---|
| 152 | |
---|
| 153 | |
---|
| 154 | cellDmaWaitTagStatusAll( DMA_MASK(1) ); |
---|
| 155 | |
---|
| 156 | //this is slowish, perhaps memcpy on SPU is smarter? |
---|
| 157 | for (i=0; btLikely( i<size );i++) |
---|
| 158 | { |
---|
| 159 | localStore0[i] = tmpTarget0[i]; |
---|
| 160 | localStore1[i] = tmpTarget1[i]; |
---|
| 161 | localStore2[i] = tmpTarget2[i]; |
---|
| 162 | } |
---|
| 163 | } |
---|
| 164 | |
---|
| 165 | void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr); |
---|
| 166 | |
---|
| 167 | class spuRaycastNodeCallback1 : public btNodeOverlapCallback |
---|
| 168 | { |
---|
| 169 | RaycastGatheredObjectData* m_gatheredObjectData; |
---|
| 170 | const SpuRaycastTaskWorkUnit* m_workUnits; |
---|
| 171 | SpuRaycastTaskWorkUnitOut* m_workUnitsOut; |
---|
| 172 | int m_workUnit; |
---|
| 173 | RaycastTask_LocalStoreMemory* m_lsMemPtr; |
---|
| 174 | |
---|
| 175 | ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); |
---|
| 176 | ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); |
---|
| 177 | //ATTRIBUTE_ALIGNED16(int spuIndices[16]); |
---|
| 178 | public: |
---|
| 179 | spuRaycastNodeCallback1(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
| 180 | : m_gatheredObjectData(gatheredObjectData), |
---|
| 181 | m_workUnits(workUnits), |
---|
| 182 | m_workUnitsOut(workUnitsOut), |
---|
| 183 | m_workUnit(0), |
---|
| 184 | m_lsMemPtr (lsMemPtr) |
---|
| 185 | { |
---|
| 186 | } |
---|
| 187 | |
---|
| 188 | void setWorkUnit (int workUnit) { m_workUnit = workUnit; } |
---|
| 189 | virtual void processNode(int subPart, int triangleIndex) |
---|
| 190 | { |
---|
| 191 | ///Create a triangle on the stack, call process collision, with GJK |
---|
| 192 | ///DMA the vertices, can benefit from software caching |
---|
| 193 | |
---|
| 194 | // spu_printf("processNode with triangleIndex %d\n",triangleIndex); |
---|
| 195 | |
---|
| 196 | // ugly solution to support both 16bit and 32bit indices |
---|
| 197 | if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT) |
---|
| 198 | { |
---|
| 199 | short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
| 200 | ATTRIBUTE_ALIGNED16(short int tmpIndices[3]); |
---|
| 201 | |
---|
| 202 | small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
| 203 | &tmpIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
| 204 | &tmpIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
| 205 | sizeof(short int)); |
---|
| 206 | |
---|
| 207 | m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]); |
---|
| 208 | m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]); |
---|
| 209 | m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]); |
---|
| 210 | } else |
---|
| 211 | { |
---|
| 212 | int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
| 213 | |
---|
| 214 | small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
| 215 | &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
| 216 | &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
| 217 | sizeof(int)); |
---|
| 218 | } |
---|
| 219 | |
---|
| 220 | //printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]); |
---|
| 221 | // spu_printf("SPU index0=%d ,",spuIndices[0]); |
---|
| 222 | // spu_printf("SPU index1=%d ,",spuIndices[1]); |
---|
| 223 | // spu_printf("SPU index2=%d ,",spuIndices[2]); |
---|
| 224 | // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); |
---|
| 225 | |
---|
| 226 | const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); |
---|
| 227 | |
---|
| 228 | for (int j=2;btLikely( j>=0 );j--) |
---|
| 229 | { |
---|
| 230 | int graphicsindex = m_lsMemPtr->spuIndices[j]; |
---|
| 231 | |
---|
| 232 | //spu_printf("SPU index=%d ,",graphicsindex); |
---|
| 233 | btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); |
---|
| 234 | |
---|
| 235 | // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); |
---|
| 236 | |
---|
| 237 | |
---|
| 238 | ///handle un-aligned vertices... |
---|
| 239 | |
---|
| 240 | //another DMA for each vertex |
---|
| 241 | small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], |
---|
| 242 | &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], |
---|
| 243 | &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], |
---|
| 244 | sizeof(btScalar)); |
---|
| 245 | |
---|
| 246 | //printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]); |
---|
| 247 | spuTriangleVertices[j] = btVector3( |
---|
| 248 | spuUnscaledVertex[0]*meshScaling.getX(), |
---|
| 249 | spuUnscaledVertex[1]*meshScaling.getY(), |
---|
| 250 | spuUnscaledVertex[2]*meshScaling.getZ()); |
---|
| 251 | |
---|
| 252 | //spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); |
---|
| 253 | } |
---|
| 254 | |
---|
| 255 | RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData); |
---|
| 256 | triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE; |
---|
| 257 | triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0]; |
---|
| 258 | |
---|
| 259 | //printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]); |
---|
| 260 | //printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]); |
---|
| 261 | //printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]); |
---|
| 262 | SpuRaycastTaskWorkUnitOut out; |
---|
| 263 | out.hitFraction = 1.0; |
---|
| 264 | performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[m_workUnit], &out, m_lsMemPtr); |
---|
| 265 | /* XXX: For now only take the closest hit */ |
---|
| 266 | if (out.hitFraction < m_workUnitsOut[m_workUnit].hitFraction) |
---|
| 267 | { |
---|
| 268 | m_workUnitsOut[m_workUnit].hitFraction = out.hitFraction; |
---|
| 269 | m_workUnitsOut[m_workUnit].hitNormal = out.hitNormal; |
---|
| 270 | } |
---|
| 271 | } |
---|
| 272 | |
---|
| 273 | }; |
---|
| 274 | |
---|
| 275 | class spuRaycastNodeCallback : public btNodeOverlapCallback |
---|
| 276 | { |
---|
| 277 | RaycastGatheredObjectData* m_gatheredObjectData; |
---|
| 278 | const SpuRaycastTaskWorkUnit* m_workUnits; |
---|
| 279 | SpuRaycastTaskWorkUnitOut* m_workUnitsOut; |
---|
| 280 | int m_numWorkUnits; |
---|
| 281 | RaycastTask_LocalStoreMemory* m_lsMemPtr; |
---|
| 282 | |
---|
| 283 | ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); |
---|
| 284 | ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); |
---|
| 285 | //ATTRIBUTE_ALIGNED16(int spuIndices[16]); |
---|
| 286 | public: |
---|
| 287 | spuRaycastNodeCallback(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
| 288 | : m_gatheredObjectData(gatheredObjectData), |
---|
| 289 | m_workUnits(workUnits), |
---|
| 290 | m_workUnitsOut(workUnitsOut), |
---|
| 291 | m_numWorkUnits(numWorkUnits), |
---|
| 292 | m_lsMemPtr (lsMemPtr) |
---|
| 293 | { |
---|
| 294 | } |
---|
| 295 | |
---|
| 296 | virtual void processNode(int subPart, int triangleIndex) |
---|
| 297 | { |
---|
| 298 | ///Create a triangle on the stack, call process collision, with GJK |
---|
| 299 | ///DMA the vertices, can benefit from software caching |
---|
| 300 | |
---|
| 301 | // spu_printf("processNode with triangleIndex %d\n",triangleIndex); |
---|
| 302 | |
---|
| 303 | // ugly solution to support both 16bit and 32bit indices |
---|
| 304 | if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT) |
---|
| 305 | { |
---|
| 306 | short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
| 307 | ATTRIBUTE_ALIGNED16(short int tmpIndices[3]); |
---|
| 308 | |
---|
| 309 | small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
| 310 | &tmpIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
| 311 | &tmpIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
| 312 | sizeof(short int)); |
---|
| 313 | |
---|
| 314 | m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]); |
---|
| 315 | m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]); |
---|
| 316 | m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]); |
---|
| 317 | } else |
---|
| 318 | { |
---|
| 319 | int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
| 320 | |
---|
| 321 | small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
| 322 | &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
| 323 | &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
| 324 | sizeof(int)); |
---|
| 325 | } |
---|
| 326 | |
---|
| 327 | //printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]); |
---|
| 328 | // spu_printf("SPU index0=%d ,",spuIndices[0]); |
---|
| 329 | // spu_printf("SPU index1=%d ,",spuIndices[1]); |
---|
| 330 | // spu_printf("SPU index2=%d ,",spuIndices[2]); |
---|
| 331 | // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); |
---|
| 332 | |
---|
| 333 | const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); |
---|
| 334 | |
---|
| 335 | for (int j=2;btLikely( j>=0 );j--) |
---|
| 336 | { |
---|
| 337 | int graphicsindex = m_lsMemPtr->spuIndices[j]; |
---|
| 338 | |
---|
| 339 | //spu_printf("SPU index=%d ,",graphicsindex); |
---|
| 340 | btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); |
---|
| 341 | |
---|
| 342 | // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); |
---|
| 343 | |
---|
| 344 | |
---|
| 345 | ///handle un-aligned vertices... |
---|
| 346 | |
---|
| 347 | //another DMA for each vertex |
---|
| 348 | small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], |
---|
| 349 | &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], |
---|
| 350 | &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], |
---|
| 351 | sizeof(btScalar)); |
---|
| 352 | |
---|
| 353 | //printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]); |
---|
| 354 | spuTriangleVertices[j] = btVector3( |
---|
| 355 | spuUnscaledVertex[0]*meshScaling.getX(), |
---|
| 356 | spuUnscaledVertex[1]*meshScaling.getY(), |
---|
| 357 | spuUnscaledVertex[2]*meshScaling.getZ()); |
---|
| 358 | |
---|
| 359 | //spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); |
---|
| 360 | } |
---|
| 361 | |
---|
| 362 | RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData); |
---|
| 363 | triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE; |
---|
| 364 | triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0]; |
---|
| 365 | |
---|
| 366 | //printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]); |
---|
| 367 | //printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]); |
---|
| 368 | //printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]); |
---|
| 369 | for (int i = 0; i < m_numWorkUnits; i++) |
---|
| 370 | { |
---|
| 371 | SpuRaycastTaskWorkUnitOut out; |
---|
| 372 | out.hitFraction = 1.0; |
---|
| 373 | performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[i], &out, m_lsMemPtr); |
---|
| 374 | /* XXX: For now only take the closest hit */ |
---|
| 375 | if (out.hitFraction < m_workUnitsOut[i].hitFraction) |
---|
| 376 | { |
---|
| 377 | m_workUnitsOut[i].hitFraction = out.hitFraction; |
---|
| 378 | m_workUnitsOut[i].hitNormal = out.hitNormal; |
---|
| 379 | } |
---|
| 380 | } |
---|
| 381 | } |
---|
| 382 | |
---|
| 383 | }; |
---|
| 384 | |
---|
| 385 | |
---|
| 386 | void spuWalkStacklessQuantizedTreeAgainstRays(RaycastTask_LocalStoreMemory* lsMemPtr, |
---|
| 387 | btNodeOverlapCallback* nodeCallback, |
---|
| 388 | const btVector3* rayFrom, |
---|
| 389 | const btVector3* rayTo, |
---|
| 390 | int numWorkUnits, |
---|
| 391 | unsigned short int* quantizedQueryAabbMin, |
---|
| 392 | unsigned short int* quantizedQueryAabbMax, |
---|
| 393 | const btQuantizedBvhNode* rootNode, |
---|
| 394 | int startNodeIndex,int endNodeIndex) |
---|
| 395 | { |
---|
| 396 | int curIndex = startNodeIndex; |
---|
| 397 | int walkIterations = 0; |
---|
| 398 | int subTreeSize = endNodeIndex - startNodeIndex; |
---|
| 399 | |
---|
| 400 | int escapeIndex; |
---|
| 401 | |
---|
| 402 | unsigned int boxBoxOverlap, rayBoxOverlap, anyRayBoxOverlap; |
---|
| 403 | unsigned int isLeafNode; |
---|
| 404 | |
---|
| 405 | #define RAYAABB2 |
---|
| 406 | #ifdef RAYAABB2 |
---|
| 407 | unsigned int sign[SPU_RAYCAST_WORK_UNITS_PER_TASK][3]; |
---|
| 408 | btVector3 rayInvDirection[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
| 409 | btScalar lambda_max[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
| 410 | for (int i = 0; i < numWorkUnits; i++) |
---|
| 411 | { |
---|
| 412 | btVector3 rayDirection = (rayTo[i]-rayFrom[i]); |
---|
| 413 | rayDirection.normalize (); |
---|
| 414 | lambda_max[i] = rayDirection.dot(rayTo[i]-rayFrom[i]); |
---|
| 415 | rayInvDirection[i][0] = btScalar(1.0) / rayDirection[0]; |
---|
| 416 | rayInvDirection[i][1] = btScalar(1.0) / rayDirection[1]; |
---|
| 417 | rayInvDirection[i][2] = btScalar(1.0) / rayDirection[2]; |
---|
| 418 | sign[i][0] = rayDirection[0] < 0.0; |
---|
| 419 | sign[i][1] = rayDirection[1] < 0.0; |
---|
| 420 | sign[i][2] = rayDirection[2] < 0.0; |
---|
| 421 | } |
---|
| 422 | #endif |
---|
| 423 | |
---|
| 424 | while (curIndex < endNodeIndex) |
---|
| 425 | { |
---|
| 426 | //catch bugs in tree data |
---|
| 427 | assert (walkIterations < subTreeSize); |
---|
| 428 | |
---|
| 429 | walkIterations++; |
---|
| 430 | |
---|
| 431 | isLeafNode = rootNode->isLeafNode(); |
---|
| 432 | |
---|
| 433 | anyRayBoxOverlap = 0; |
---|
| 434 | |
---|
| 435 | for (int i = 0; i < numWorkUnits; i++) |
---|
| 436 | { |
---|
| 437 | unsigned short int* quamin = (quantizedQueryAabbMin + 3 * i); |
---|
| 438 | unsigned short int* quamax = (quantizedQueryAabbMax + 3 * i); |
---|
| 439 | boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quamin,quamax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); |
---|
| 440 | if (!boxBoxOverlap) |
---|
| 441 | continue; |
---|
| 442 | |
---|
| 443 | rayBoxOverlap = 0; |
---|
| 444 | btScalar param = 1.0; |
---|
| 445 | btVector3 normal; |
---|
| 446 | btVector3 bounds[2]; |
---|
| 447 | bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin); |
---|
| 448 | bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax); |
---|
| 449 | #ifdef RAYAABB2 |
---|
| 450 | rayBoxOverlap = btRayAabb2 (rayFrom[i], rayInvDirection[i], sign[i], bounds, param, 0.0, lambda_max[i]); |
---|
| 451 | #else |
---|
| 452 | rayBoxOverlap = btRayAabb(rayFrom[i], rayTo[i], bounds[0], bounds[1], param, normal); |
---|
| 453 | #endif |
---|
| 454 | |
---|
| 455 | #ifndef CALLBACK_ALL |
---|
| 456 | anyRayBoxOverlap = rayBoxOverlap || anyRayBoxOverlap; |
---|
| 457 | /* If we have any ray vs. box overlap and this isn't a leaf node |
---|
| 458 | we know that we need to dig deeper |
---|
| 459 | */ |
---|
| 460 | if (!isLeafNode && anyRayBoxOverlap) |
---|
| 461 | break; |
---|
| 462 | |
---|
| 463 | if (isLeafNode && rayBoxOverlap) |
---|
| 464 | { |
---|
| 465 | spuRaycastNodeCallback1* callback = (spuRaycastNodeCallback1*)nodeCallback; |
---|
| 466 | callback->setWorkUnit (i); |
---|
| 467 | nodeCallback->processNode (0, rootNode->getTriangleIndex()); |
---|
| 468 | } |
---|
| 469 | #else |
---|
| 470 | /* If we have any ray vs. box overlap and this isn't a leaf node |
---|
| 471 | we know that we need to dig deeper |
---|
| 472 | */ |
---|
| 473 | if (rayBoxOverlap) |
---|
| 474 | { |
---|
| 475 | anyRayBoxOverlap = 1; |
---|
| 476 | break; |
---|
| 477 | } |
---|
| 478 | #endif |
---|
| 479 | } |
---|
| 480 | |
---|
| 481 | #ifdef CALLBACK_ALL |
---|
| 482 | if (isLeafNode && anyRayBoxOverlap) |
---|
| 483 | { |
---|
| 484 | nodeCallback->processNode (0, rootNode->getTriangleIndex()); |
---|
| 485 | } |
---|
| 486 | #endif |
---|
| 487 | |
---|
| 488 | if (anyRayBoxOverlap || isLeafNode) |
---|
| 489 | { |
---|
| 490 | rootNode++; |
---|
| 491 | curIndex++; |
---|
| 492 | } else |
---|
| 493 | { |
---|
| 494 | escapeIndex = rootNode->getEscapeIndex(); |
---|
| 495 | rootNode += escapeIndex; |
---|
| 496 | curIndex += escapeIndex; |
---|
| 497 | } |
---|
| 498 | } |
---|
| 499 | |
---|
| 500 | } |
---|
| 501 | |
---|
| 502 | |
---|
| 503 | void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
| 504 | { |
---|
| 505 | //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite |
---|
| 506 | register int dmaSize; |
---|
| 507 | register ppu_address_t dmaPpuAddress2; |
---|
| 508 | |
---|
| 509 | |
---|
| 510 | btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape; |
---|
| 511 | |
---|
| 512 | //need the mesh interface, for access to triangle vertices |
---|
| 513 | dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape); |
---|
| 514 | |
---|
| 515 | unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3]; |
---|
| 516 | unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3]; |
---|
| 517 | btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
| 518 | btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
| 519 | |
---|
| 520 | /* Calculate the AABB for the ray in the triangle mesh shape */ |
---|
| 521 | btTransform rayInTriangleSpace; |
---|
| 522 | rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse(); |
---|
| 523 | |
---|
| 524 | for (int i = 0; i < numWorkUnits; i++) |
---|
| 525 | { |
---|
| 526 | btVector3 aabbMin; |
---|
| 527 | btVector3 aabbMax; |
---|
| 528 | |
---|
| 529 | rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom); |
---|
| 530 | rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo); |
---|
| 531 | |
---|
| 532 | aabbMin = rayFromInTriangleSpace[i]; |
---|
| 533 | aabbMin.setMin (rayToInTriangleSpace[i]); |
---|
| 534 | aabbMax = rayFromInTriangleSpace[i]; |
---|
| 535 | aabbMax.setMax (rayToInTriangleSpace[i]); |
---|
| 536 | |
---|
| 537 | lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0); |
---|
| 538 | lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1); |
---|
| 539 | } |
---|
| 540 | |
---|
| 541 | QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray(); |
---|
| 542 | //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); |
---|
| 543 | |
---|
| 544 | BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray(); |
---|
| 545 | |
---|
| 546 | #ifdef CALLBACK_ALL |
---|
| 547 | spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr); |
---|
| 548 | #else |
---|
| 549 | spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr); |
---|
| 550 | #endif |
---|
| 551 | |
---|
| 552 | IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray(); |
---|
| 553 | |
---|
| 554 | //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); |
---|
| 555 | // spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); |
---|
| 556 | //not likely to happen |
---|
| 557 | if (subTrees.size() && indexArray.size() == 1) |
---|
| 558 | { |
---|
| 559 | ///DMA in the index info |
---|
| 560 | dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */); |
---|
| 561 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 562 | |
---|
| 563 | //display the headers |
---|
| 564 | int numBatch = subTrees.size(); |
---|
| 565 | for (int i=0;i<numBatch;) |
---|
| 566 | { |
---|
| 567 | // BEN: TODO - can reorder DMA transfers for less stall |
---|
| 568 | int remaining = subTrees.size() - i; |
---|
| 569 | int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS; |
---|
| 570 | |
---|
| 571 | dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1); |
---|
| 572 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 573 | |
---|
| 574 | |
---|
| 575 | // spu_printf("nextBatch = %d\n",nextBatch); |
---|
| 576 | |
---|
| 577 | |
---|
| 578 | for (int j=0;j<nextBatch;j++) |
---|
| 579 | { |
---|
| 580 | const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j]; |
---|
| 581 | |
---|
| 582 | unsigned int overlap = 1; |
---|
| 583 | for (int boxId = 0; boxId < numWorkUnits; boxId++) |
---|
| 584 | { |
---|
| 585 | overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); |
---|
| 586 | if (overlap) |
---|
| 587 | break; |
---|
| 588 | } |
---|
| 589 | |
---|
| 590 | if (overlap) |
---|
| 591 | { |
---|
| 592 | btAssert(subtree.m_subtreeSize); |
---|
| 593 | |
---|
| 594 | //dma the actual nodes of this subtree |
---|
| 595 | dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2); |
---|
| 596 | |
---|
| 597 | cellDmaWaitTagStatusAll(DMA_MASK(2)); |
---|
| 598 | |
---|
| 599 | /* Walk this subtree */ |
---|
| 600 | |
---|
| 601 | { |
---|
| 602 | |
---|
| 603 | spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr, |
---|
| 604 | &nodeCallback, |
---|
| 605 | &rayFromInTriangleSpace[0], |
---|
| 606 | &rayToInTriangleSpace[0], |
---|
| 607 | numWorkUnits, |
---|
| 608 | &quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0], |
---|
| 609 | &lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize); |
---|
| 610 | } |
---|
| 611 | } |
---|
| 612 | // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); |
---|
| 613 | } |
---|
| 614 | |
---|
| 615 | // unsigned short int m_quantizedAabbMin[3]; |
---|
| 616 | // unsigned short int m_quantizedAabbMax[3]; |
---|
| 617 | // int m_rootNodeIndex; |
---|
| 618 | // int m_subtreeSize; |
---|
| 619 | i+=nextBatch; |
---|
| 620 | } |
---|
| 621 | |
---|
| 622 | //pre-fetch first tree, then loop and double buffer |
---|
| 623 | } |
---|
| 624 | |
---|
| 625 | } |
---|
| 626 | |
---|
| 627 | void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
| 628 | { |
---|
| 629 | //XXX spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n"); |
---|
| 630 | } |
---|
| 631 | |
---|
| 632 | void |
---|
| 633 | performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
| 634 | { |
---|
| 635 | SpuVoronoiSimplexSolver simplexSolver; |
---|
| 636 | |
---|
| 637 | btTransform rayFromTrans, rayToTrans; |
---|
| 638 | rayFromTrans.setIdentity (); |
---|
| 639 | rayFromTrans.setOrigin (workUnit.rayFrom); |
---|
| 640 | rayToTrans.setIdentity (); |
---|
| 641 | rayToTrans.setOrigin (workUnit.rayTo); |
---|
| 642 | |
---|
| 643 | SpuCastResult result; |
---|
| 644 | |
---|
| 645 | /* Load the vertex data if the shape is a convex hull */ |
---|
| 646 | /* XXX: We might be loading the shape twice */ |
---|
| 647 | ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]); |
---|
| 648 | if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE) |
---|
| 649 | { |
---|
| 650 | register int dmaSize; |
---|
| 651 | register ppu_address_t dmaPpuAddress2; |
---|
| 652 | dmaSize = sizeof(btConvexHullShape); |
---|
| 653 | dmaPpuAddress2 = gatheredObjectData->m_collisionShape; |
---|
| 654 | cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); |
---|
| 655 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 656 | dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape); |
---|
| 657 | cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2! |
---|
| 658 | lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape; |
---|
| 659 | lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0]; |
---|
| 660 | } |
---|
| 661 | |
---|
| 662 | /* performRaycast */ |
---|
| 663 | SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver); |
---|
| 664 | bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result); |
---|
| 665 | |
---|
| 666 | if (r) |
---|
| 667 | { |
---|
| 668 | workUnitOut->hitFraction = result.m_fraction; |
---|
| 669 | workUnitOut->hitNormal = result.m_normal; |
---|
| 670 | } |
---|
| 671 | } |
---|
| 672 | |
---|
| 673 | void processRaycastTask(void* userPtr, void* lsMemory) |
---|
| 674 | { |
---|
| 675 | RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory; |
---|
| 676 | |
---|
| 677 | SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr; |
---|
| 678 | SpuRaycastTaskDesc& taskDesc = *taskDescPtr; |
---|
| 679 | |
---|
| 680 | SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers; |
---|
| 681 | |
---|
| 682 | //spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers); |
---|
| 683 | /* for each object */ |
---|
| 684 | RaycastGatheredObjectData gatheredObjectData; |
---|
| 685 | for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++) |
---|
| 686 | { |
---|
| 687 | //spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers); |
---|
| 688 | |
---|
| 689 | /* load initial collision shape */ |
---|
| 690 | GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]); |
---|
| 691 | |
---|
| 692 | if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType)) |
---|
| 693 | { |
---|
| 694 | SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
| 695 | for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
| 696 | { |
---|
| 697 | tWorkUnitsOut[rayId].hitFraction = 1.0; |
---|
| 698 | } |
---|
| 699 | |
---|
| 700 | performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory); |
---|
| 701 | |
---|
| 702 | for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
| 703 | { |
---|
| 704 | const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; |
---|
| 705 | if (tWorkUnitsOut[rayId].hitFraction == 1.0) |
---|
| 706 | continue; |
---|
| 707 | |
---|
| 708 | ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); |
---|
| 709 | dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
| 710 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 711 | |
---|
| 712 | |
---|
| 713 | /* XXX Only support taking the closest hit for now */ |
---|
| 714 | if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction) |
---|
| 715 | { |
---|
| 716 | workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction; |
---|
| 717 | workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal; |
---|
| 718 | } |
---|
| 719 | |
---|
| 720 | /* write ray cast data back */ |
---|
| 721 | dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
| 722 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 723 | } |
---|
| 724 | } else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) { |
---|
| 725 | |
---|
| 726 | btVector3 objectBoxMin, objectBoxMax; |
---|
| 727 | computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform); |
---|
| 728 | for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
| 729 | { |
---|
| 730 | const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; |
---|
| 731 | |
---|
| 732 | btScalar ignored_param = 1.0; |
---|
| 733 | btVector3 ignored_normal; |
---|
| 734 | if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal)) |
---|
| 735 | { |
---|
| 736 | ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); |
---|
| 737 | SpuRaycastTaskWorkUnitOut tWorkUnitOut; |
---|
| 738 | tWorkUnitOut.hitFraction = 1.0; |
---|
| 739 | |
---|
| 740 | performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); |
---|
| 741 | if (tWorkUnitOut.hitFraction == 1.0) |
---|
| 742 | continue; |
---|
| 743 | |
---|
| 744 | dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
| 745 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 746 | |
---|
| 747 | /* XXX Only support taking the closest hit for now */ |
---|
| 748 | if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction) |
---|
| 749 | { |
---|
| 750 | workUnitOut.hitFraction = tWorkUnitOut.hitFraction; |
---|
| 751 | workUnitOut.hitNormal = tWorkUnitOut.hitNormal; |
---|
| 752 | /* write ray cast data back */ |
---|
| 753 | dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
| 754 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 755 | } |
---|
| 756 | } |
---|
| 757 | } |
---|
| 758 | |
---|
| 759 | } else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) { |
---|
| 760 | for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
| 761 | { |
---|
| 762 | const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; |
---|
| 763 | ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); |
---|
| 764 | SpuRaycastTaskWorkUnitOut tWorkUnitOut; |
---|
| 765 | tWorkUnitOut.hitFraction = 1.0; |
---|
| 766 | |
---|
| 767 | performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); |
---|
| 768 | if (tWorkUnitOut.hitFraction == 1.0) |
---|
| 769 | continue; |
---|
| 770 | |
---|
| 771 | dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
| 772 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 773 | /* XXX Only support taking the closest hit for now */ |
---|
| 774 | if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction) |
---|
| 775 | { |
---|
| 776 | workUnitOut.hitFraction = tWorkUnitOut.hitFraction; |
---|
| 777 | workUnitOut.hitNormal = tWorkUnitOut.hitNormal; |
---|
| 778 | } |
---|
| 779 | |
---|
| 780 | /* write ray cast data back */ |
---|
| 781 | dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
| 782 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
| 783 | } |
---|
| 784 | } |
---|
| 785 | } |
---|
| 786 | } |
---|