1 | |
---|
2 | |
---|
3 | #include "../PlatformDefinitions.h" |
---|
4 | #include "SpuRaycastTask.h" |
---|
5 | #include "../SpuCollisionObjectWrapper.h" |
---|
6 | #include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h" |
---|
7 | #include "SpuSubSimplexConvexCast.h" |
---|
8 | #include "LinearMath/btAabbUtil2.h" |
---|
9 | |
---|
10 | |
---|
11 | /* Future optimization strategies: |
---|
12 | 1. BBOX prune before loading shape data |
---|
13 | 2. Could reduce number of dmas for ray output data to a single read and write. |
---|
14 | By sharing the temporary work unit output structures across objects. |
---|
15 | 3. The reason SpuRaycastNodeCallback1 is slower is because the triangle data isn't |
---|
16 | being cached across calls. Fix that by doing the final ray pruning inside the callback. |
---|
17 | */ |
---|
18 | |
---|
19 | /* Future work: |
---|
20 | 1. support first hit, closest hit, etc rather than just closest hit. |
---|
21 | 2. support compound objects |
---|
22 | */ |
---|
23 | |
---|
24 | #define CALLBACK_ALL |
---|
25 | |
---|
26 | struct RaycastTask_LocalStoreMemory |
---|
27 | { |
---|
28 | ATTRIBUTE_ALIGNED16(char gColObj [sizeof(btCollisionObject)+16]); |
---|
29 | btCollisionObject* getColObj() |
---|
30 | { |
---|
31 | return (btCollisionObject*) gColObj; |
---|
32 | } |
---|
33 | |
---|
34 | ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper); |
---|
35 | SpuCollisionObjectWrapper* getCollisionObjectWrapper () |
---|
36 | { |
---|
37 | return &gCollisionObjectWrapper; |
---|
38 | } |
---|
39 | |
---|
40 | CollisionShape_LocalStoreMemory gCollisionShape; |
---|
41 | ATTRIBUTE_ALIGNED16(int spuIndices[16]); |
---|
42 | |
---|
43 | bvhMeshShape_LocalStoreMemory bvhShapeData; |
---|
44 | SpuConvexPolyhedronVertexData convexVertexData; |
---|
45 | CompoundShape_LocalStoreMemory compoundShapeData; |
---|
46 | }; |
---|
47 | |
---|
48 | #ifdef WIN32 |
---|
49 | void* createRaycastLocalStoreMemory() |
---|
50 | { |
---|
51 | return new RaycastTask_LocalStoreMemory; |
---|
52 | }; |
---|
53 | #elif defined(__CELLOS_LV2__) |
---|
54 | ATTRIBUTE_ALIGNED16(RaycastTask_LocalStoreMemory gLocalStoreMemory); |
---|
55 | void* createRaycastLocalStoreMemory() |
---|
56 | { |
---|
57 | return &gLocalStoreMemory; |
---|
58 | } |
---|
59 | #endif |
---|
60 | |
---|
61 | void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper) |
---|
62 | { |
---|
63 | register int dmaSize; |
---|
64 | register ppu_address_t dmaPpuAddress2; |
---|
65 | /* DMA Collision object wrapper into local store */ |
---|
66 | dmaSize = sizeof(SpuCollisionObjectWrapper); |
---|
67 | dmaPpuAddress2 = objectWrapper; |
---|
68 | cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); |
---|
69 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
70 | |
---|
71 | /* DMA Collision object into local store */ |
---|
72 | dmaSize = sizeof(btCollisionObject); |
---|
73 | dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr(); |
---|
74 | cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); |
---|
75 | cellDmaWaitTagStatusAll(DMA_MASK(2)); |
---|
76 | |
---|
77 | /* Gather information about collision object and shape */ |
---|
78 | gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform(); |
---|
79 | gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin (); |
---|
80 | gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType (); |
---|
81 | gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape(); |
---|
82 | gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape; |
---|
83 | |
---|
84 | /* DMA shape data */ |
---|
85 | dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType); |
---|
86 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
87 | if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType)) |
---|
88 | { |
---|
89 | btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape; |
---|
90 | gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions (); |
---|
91 | } else { |
---|
92 | gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0); |
---|
93 | } |
---|
94 | |
---|
95 | } |
---|
96 | |
---|
97 | void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) |
---|
98 | { |
---|
99 | cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); |
---|
100 | } |
---|
101 | |
---|
102 | void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) |
---|
103 | { |
---|
104 | cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); |
---|
105 | } |
---|
106 | |
---|
107 | #if 0 |
---|
108 | SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) |
---|
109 | { |
---|
110 | #if USE_SOFTWARE_CACHE |
---|
111 | // Check for alignment requirements. We need to make sure the entire request fits within one cache line, |
---|
112 | // so the first and last bytes should fall on the same cache line |
---|
113 | btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); |
---|
114 | |
---|
115 | void* ls = spe_cache_read(ea); |
---|
116 | memcpy(buffer, ls, size); |
---|
117 | #else |
---|
118 | stallingUnalignedDmaSmallGet(buffer,ea,size); |
---|
119 | #endif |
---|
120 | } |
---|
121 | #endif |
---|
122 | |
---|
123 | void small_cache_read_triple( void* ls0, ppu_address_t ea0, |
---|
124 | void* ls1, ppu_address_t ea1, |
---|
125 | void* ls2, ppu_address_t ea2, |
---|
126 | size_t size) |
---|
127 | { |
---|
128 | btAssert(size<16); |
---|
129 | ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]); |
---|
130 | ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]); |
---|
131 | ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]); |
---|
132 | |
---|
133 | uint32_t i; |
---|
134 | |
---|
135 | |
---|
136 | ///make sure last 4 bits are the same, for cellDmaSmallGet |
---|
137 | char* localStore0 = (char*)ls0; |
---|
138 | uint32_t last4BitsOffset = ea0 & 0x0f; |
---|
139 | char* tmpTarget0 = tmpBuffer0 + last4BitsOffset; |
---|
140 | tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0); |
---|
141 | |
---|
142 | |
---|
143 | char* localStore1 = (char*)ls1; |
---|
144 | last4BitsOffset = ea1 & 0x0f; |
---|
145 | char* tmpTarget1 = tmpBuffer1 + last4BitsOffset; |
---|
146 | tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0); |
---|
147 | |
---|
148 | char* localStore2 = (char*)ls2; |
---|
149 | last4BitsOffset = ea2 & 0x0f; |
---|
150 | char* tmpTarget2 = tmpBuffer2 + last4BitsOffset; |
---|
151 | tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0); |
---|
152 | |
---|
153 | |
---|
154 | cellDmaWaitTagStatusAll( DMA_MASK(1) ); |
---|
155 | |
---|
156 | //this is slowish, perhaps memcpy on SPU is smarter? |
---|
157 | for (i=0; btLikely( i<size );i++) |
---|
158 | { |
---|
159 | localStore0[i] = tmpTarget0[i]; |
---|
160 | localStore1[i] = tmpTarget1[i]; |
---|
161 | localStore2[i] = tmpTarget2[i]; |
---|
162 | } |
---|
163 | } |
---|
164 | |
---|
165 | void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr); |
---|
166 | |
---|
167 | class spuRaycastNodeCallback1 : public btNodeOverlapCallback |
---|
168 | { |
---|
169 | RaycastGatheredObjectData* m_gatheredObjectData; |
---|
170 | const SpuRaycastTaskWorkUnit* m_workUnits; |
---|
171 | SpuRaycastTaskWorkUnitOut* m_workUnitsOut; |
---|
172 | int m_workUnit; |
---|
173 | RaycastTask_LocalStoreMemory* m_lsMemPtr; |
---|
174 | |
---|
175 | ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); |
---|
176 | ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); |
---|
177 | //ATTRIBUTE_ALIGNED16(int spuIndices[16]); |
---|
178 | public: |
---|
179 | spuRaycastNodeCallback1(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
180 | : m_gatheredObjectData(gatheredObjectData), |
---|
181 | m_workUnits(workUnits), |
---|
182 | m_workUnitsOut(workUnitsOut), |
---|
183 | m_workUnit(0), |
---|
184 | m_lsMemPtr (lsMemPtr) |
---|
185 | { |
---|
186 | } |
---|
187 | |
---|
188 | void setWorkUnit (int workUnit) { m_workUnit = workUnit; } |
---|
189 | virtual void processNode(int subPart, int triangleIndex) |
---|
190 | { |
---|
191 | ///Create a triangle on the stack, call process collision, with GJK |
---|
192 | ///DMA the vertices, can benefit from software caching |
---|
193 | |
---|
194 | // spu_printf("processNode with triangleIndex %d\n",triangleIndex); |
---|
195 | |
---|
196 | // ugly solution to support both 16bit and 32bit indices |
---|
197 | if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT) |
---|
198 | { |
---|
199 | short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
200 | ATTRIBUTE_ALIGNED16(short int tmpIndices[3]); |
---|
201 | |
---|
202 | small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
203 | &tmpIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
204 | &tmpIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
205 | sizeof(short int)); |
---|
206 | |
---|
207 | m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]); |
---|
208 | m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]); |
---|
209 | m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]); |
---|
210 | } else |
---|
211 | { |
---|
212 | int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
213 | |
---|
214 | small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
215 | &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
216 | &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
217 | sizeof(int)); |
---|
218 | } |
---|
219 | |
---|
220 | //printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]); |
---|
221 | // spu_printf("SPU index0=%d ,",spuIndices[0]); |
---|
222 | // spu_printf("SPU index1=%d ,",spuIndices[1]); |
---|
223 | // spu_printf("SPU index2=%d ,",spuIndices[2]); |
---|
224 | // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); |
---|
225 | |
---|
226 | const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); |
---|
227 | |
---|
228 | for (int j=2;btLikely( j>=0 );j--) |
---|
229 | { |
---|
230 | int graphicsindex = m_lsMemPtr->spuIndices[j]; |
---|
231 | |
---|
232 | //spu_printf("SPU index=%d ,",graphicsindex); |
---|
233 | btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); |
---|
234 | |
---|
235 | // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); |
---|
236 | |
---|
237 | |
---|
238 | ///handle un-aligned vertices... |
---|
239 | |
---|
240 | //another DMA for each vertex |
---|
241 | small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], |
---|
242 | &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], |
---|
243 | &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], |
---|
244 | sizeof(btScalar)); |
---|
245 | |
---|
246 | //printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]); |
---|
247 | spuTriangleVertices[j] = btVector3( |
---|
248 | spuUnscaledVertex[0]*meshScaling.getX(), |
---|
249 | spuUnscaledVertex[1]*meshScaling.getY(), |
---|
250 | spuUnscaledVertex[2]*meshScaling.getZ()); |
---|
251 | |
---|
252 | //spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); |
---|
253 | } |
---|
254 | |
---|
255 | RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData); |
---|
256 | triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE; |
---|
257 | triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0]; |
---|
258 | |
---|
259 | //printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]); |
---|
260 | //printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]); |
---|
261 | //printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]); |
---|
262 | SpuRaycastTaskWorkUnitOut out; |
---|
263 | out.hitFraction = 1.0; |
---|
264 | performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[m_workUnit], &out, m_lsMemPtr); |
---|
265 | /* XXX: For now only take the closest hit */ |
---|
266 | if (out.hitFraction < m_workUnitsOut[m_workUnit].hitFraction) |
---|
267 | { |
---|
268 | m_workUnitsOut[m_workUnit].hitFraction = out.hitFraction; |
---|
269 | m_workUnitsOut[m_workUnit].hitNormal = out.hitNormal; |
---|
270 | } |
---|
271 | } |
---|
272 | |
---|
273 | }; |
---|
274 | |
---|
275 | class spuRaycastNodeCallback : public btNodeOverlapCallback |
---|
276 | { |
---|
277 | RaycastGatheredObjectData* m_gatheredObjectData; |
---|
278 | const SpuRaycastTaskWorkUnit* m_workUnits; |
---|
279 | SpuRaycastTaskWorkUnitOut* m_workUnitsOut; |
---|
280 | int m_numWorkUnits; |
---|
281 | RaycastTask_LocalStoreMemory* m_lsMemPtr; |
---|
282 | |
---|
283 | ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); |
---|
284 | ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); |
---|
285 | //ATTRIBUTE_ALIGNED16(int spuIndices[16]); |
---|
286 | public: |
---|
287 | spuRaycastNodeCallback(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
288 | : m_gatheredObjectData(gatheredObjectData), |
---|
289 | m_workUnits(workUnits), |
---|
290 | m_workUnitsOut(workUnitsOut), |
---|
291 | m_numWorkUnits(numWorkUnits), |
---|
292 | m_lsMemPtr (lsMemPtr) |
---|
293 | { |
---|
294 | } |
---|
295 | |
---|
296 | virtual void processNode(int subPart, int triangleIndex) |
---|
297 | { |
---|
298 | ///Create a triangle on the stack, call process collision, with GJK |
---|
299 | ///DMA the vertices, can benefit from software caching |
---|
300 | |
---|
301 | // spu_printf("processNode with triangleIndex %d\n",triangleIndex); |
---|
302 | |
---|
303 | // ugly solution to support both 16bit and 32bit indices |
---|
304 | if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT) |
---|
305 | { |
---|
306 | short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
307 | ATTRIBUTE_ALIGNED16(short int tmpIndices[3]); |
---|
308 | |
---|
309 | small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
310 | &tmpIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
311 | &tmpIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
312 | sizeof(short int)); |
---|
313 | |
---|
314 | m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]); |
---|
315 | m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]); |
---|
316 | m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]); |
---|
317 | } else |
---|
318 | { |
---|
319 | int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); |
---|
320 | |
---|
321 | small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], |
---|
322 | &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], |
---|
323 | &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2], |
---|
324 | sizeof(int)); |
---|
325 | } |
---|
326 | |
---|
327 | //printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]); |
---|
328 | // spu_printf("SPU index0=%d ,",spuIndices[0]); |
---|
329 | // spu_printf("SPU index1=%d ,",spuIndices[1]); |
---|
330 | // spu_printf("SPU index2=%d ,",spuIndices[2]); |
---|
331 | // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); |
---|
332 | |
---|
333 | const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); |
---|
334 | |
---|
335 | for (int j=2;btLikely( j>=0 );j--) |
---|
336 | { |
---|
337 | int graphicsindex = m_lsMemPtr->spuIndices[j]; |
---|
338 | |
---|
339 | //spu_printf("SPU index=%d ,",graphicsindex); |
---|
340 | btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); |
---|
341 | |
---|
342 | // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); |
---|
343 | |
---|
344 | |
---|
345 | ///handle un-aligned vertices... |
---|
346 | |
---|
347 | //another DMA for each vertex |
---|
348 | small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], |
---|
349 | &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], |
---|
350 | &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], |
---|
351 | sizeof(btScalar)); |
---|
352 | |
---|
353 | //printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]); |
---|
354 | spuTriangleVertices[j] = btVector3( |
---|
355 | spuUnscaledVertex[0]*meshScaling.getX(), |
---|
356 | spuUnscaledVertex[1]*meshScaling.getY(), |
---|
357 | spuUnscaledVertex[2]*meshScaling.getZ()); |
---|
358 | |
---|
359 | //spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); |
---|
360 | } |
---|
361 | |
---|
362 | RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData); |
---|
363 | triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE; |
---|
364 | triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0]; |
---|
365 | |
---|
366 | //printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]); |
---|
367 | //printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]); |
---|
368 | //printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]); |
---|
369 | for (int i = 0; i < m_numWorkUnits; i++) |
---|
370 | { |
---|
371 | SpuRaycastTaskWorkUnitOut out; |
---|
372 | out.hitFraction = 1.0; |
---|
373 | performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[i], &out, m_lsMemPtr); |
---|
374 | /* XXX: For now only take the closest hit */ |
---|
375 | if (out.hitFraction < m_workUnitsOut[i].hitFraction) |
---|
376 | { |
---|
377 | m_workUnitsOut[i].hitFraction = out.hitFraction; |
---|
378 | m_workUnitsOut[i].hitNormal = out.hitNormal; |
---|
379 | } |
---|
380 | } |
---|
381 | } |
---|
382 | |
---|
383 | }; |
---|
384 | |
---|
385 | |
---|
386 | void spuWalkStacklessQuantizedTreeAgainstRays(RaycastTask_LocalStoreMemory* lsMemPtr, |
---|
387 | btNodeOverlapCallback* nodeCallback, |
---|
388 | const btVector3* rayFrom, |
---|
389 | const btVector3* rayTo, |
---|
390 | int numWorkUnits, |
---|
391 | unsigned short int* quantizedQueryAabbMin, |
---|
392 | unsigned short int* quantizedQueryAabbMax, |
---|
393 | const btQuantizedBvhNode* rootNode, |
---|
394 | int startNodeIndex,int endNodeIndex) |
---|
395 | { |
---|
396 | int curIndex = startNodeIndex; |
---|
397 | int walkIterations = 0; |
---|
398 | int subTreeSize = endNodeIndex - startNodeIndex; |
---|
399 | |
---|
400 | int escapeIndex; |
---|
401 | |
---|
402 | unsigned int boxBoxOverlap, rayBoxOverlap, anyRayBoxOverlap; |
---|
403 | unsigned int isLeafNode; |
---|
404 | |
---|
405 | #define RAYAABB2 |
---|
406 | #ifdef RAYAABB2 |
---|
407 | unsigned int sign[SPU_RAYCAST_WORK_UNITS_PER_TASK][3]; |
---|
408 | btVector3 rayInvDirection[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
409 | btScalar lambda_max[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
410 | for (int i = 0; i < numWorkUnits; i++) |
---|
411 | { |
---|
412 | btVector3 rayDirection = (rayTo[i]-rayFrom[i]); |
---|
413 | rayDirection.normalize (); |
---|
414 | lambda_max[i] = rayDirection.dot(rayTo[i]-rayFrom[i]); |
---|
415 | rayInvDirection[i][0] = btScalar(1.0) / rayDirection[0]; |
---|
416 | rayInvDirection[i][1] = btScalar(1.0) / rayDirection[1]; |
---|
417 | rayInvDirection[i][2] = btScalar(1.0) / rayDirection[2]; |
---|
418 | sign[i][0] = rayDirection[0] < 0.0; |
---|
419 | sign[i][1] = rayDirection[1] < 0.0; |
---|
420 | sign[i][2] = rayDirection[2] < 0.0; |
---|
421 | } |
---|
422 | #endif |
---|
423 | |
---|
424 | while (curIndex < endNodeIndex) |
---|
425 | { |
---|
426 | //catch bugs in tree data |
---|
427 | assert (walkIterations < subTreeSize); |
---|
428 | |
---|
429 | walkIterations++; |
---|
430 | |
---|
431 | isLeafNode = rootNode->isLeafNode(); |
---|
432 | |
---|
433 | anyRayBoxOverlap = 0; |
---|
434 | |
---|
435 | for (int i = 0; i < numWorkUnits; i++) |
---|
436 | { |
---|
437 | unsigned short int* quamin = (quantizedQueryAabbMin + 3 * i); |
---|
438 | unsigned short int* quamax = (quantizedQueryAabbMax + 3 * i); |
---|
439 | boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quamin,quamax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); |
---|
440 | if (!boxBoxOverlap) |
---|
441 | continue; |
---|
442 | |
---|
443 | rayBoxOverlap = 0; |
---|
444 | btScalar param = 1.0; |
---|
445 | btVector3 normal; |
---|
446 | btVector3 bounds[2]; |
---|
447 | bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin); |
---|
448 | bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax); |
---|
449 | #ifdef RAYAABB2 |
---|
450 | rayBoxOverlap = btRayAabb2 (rayFrom[i], rayInvDirection[i], sign[i], bounds, param, 0.0, lambda_max[i]); |
---|
451 | #else |
---|
452 | rayBoxOverlap = btRayAabb(rayFrom[i], rayTo[i], bounds[0], bounds[1], param, normal); |
---|
453 | #endif |
---|
454 | |
---|
455 | #ifndef CALLBACK_ALL |
---|
456 | anyRayBoxOverlap = rayBoxOverlap || anyRayBoxOverlap; |
---|
457 | /* If we have any ray vs. box overlap and this isn't a leaf node |
---|
458 | we know that we need to dig deeper |
---|
459 | */ |
---|
460 | if (!isLeafNode && anyRayBoxOverlap) |
---|
461 | break; |
---|
462 | |
---|
463 | if (isLeafNode && rayBoxOverlap) |
---|
464 | { |
---|
465 | spuRaycastNodeCallback1* callback = (spuRaycastNodeCallback1*)nodeCallback; |
---|
466 | callback->setWorkUnit (i); |
---|
467 | nodeCallback->processNode (0, rootNode->getTriangleIndex()); |
---|
468 | } |
---|
469 | #else |
---|
470 | /* If we have any ray vs. box overlap and this isn't a leaf node |
---|
471 | we know that we need to dig deeper |
---|
472 | */ |
---|
473 | if (rayBoxOverlap) |
---|
474 | { |
---|
475 | anyRayBoxOverlap = 1; |
---|
476 | break; |
---|
477 | } |
---|
478 | #endif |
---|
479 | } |
---|
480 | |
---|
481 | #ifdef CALLBACK_ALL |
---|
482 | if (isLeafNode && anyRayBoxOverlap) |
---|
483 | { |
---|
484 | nodeCallback->processNode (0, rootNode->getTriangleIndex()); |
---|
485 | } |
---|
486 | #endif |
---|
487 | |
---|
488 | if (anyRayBoxOverlap || isLeafNode) |
---|
489 | { |
---|
490 | rootNode++; |
---|
491 | curIndex++; |
---|
492 | } else |
---|
493 | { |
---|
494 | escapeIndex = rootNode->getEscapeIndex(); |
---|
495 | rootNode += escapeIndex; |
---|
496 | curIndex += escapeIndex; |
---|
497 | } |
---|
498 | } |
---|
499 | |
---|
500 | } |
---|
501 | |
---|
502 | |
---|
503 | void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
504 | { |
---|
505 | //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite |
---|
506 | register int dmaSize; |
---|
507 | register ppu_address_t dmaPpuAddress2; |
---|
508 | |
---|
509 | |
---|
510 | btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape; |
---|
511 | |
---|
512 | //need the mesh interface, for access to triangle vertices |
---|
513 | dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape); |
---|
514 | |
---|
515 | unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3]; |
---|
516 | unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3]; |
---|
517 | btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
518 | btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
519 | |
---|
520 | /* Calculate the AABB for the ray in the triangle mesh shape */ |
---|
521 | btTransform rayInTriangleSpace; |
---|
522 | rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse(); |
---|
523 | |
---|
524 | for (int i = 0; i < numWorkUnits; i++) |
---|
525 | { |
---|
526 | btVector3 aabbMin; |
---|
527 | btVector3 aabbMax; |
---|
528 | |
---|
529 | rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom); |
---|
530 | rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo); |
---|
531 | |
---|
532 | aabbMin = rayFromInTriangleSpace[i]; |
---|
533 | aabbMin.setMin (rayToInTriangleSpace[i]); |
---|
534 | aabbMax = rayFromInTriangleSpace[i]; |
---|
535 | aabbMax.setMax (rayToInTriangleSpace[i]); |
---|
536 | |
---|
537 | lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0); |
---|
538 | lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1); |
---|
539 | } |
---|
540 | |
---|
541 | QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray(); |
---|
542 | //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); |
---|
543 | |
---|
544 | BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray(); |
---|
545 | |
---|
546 | #ifdef CALLBACK_ALL |
---|
547 | spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr); |
---|
548 | #else |
---|
549 | spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr); |
---|
550 | #endif |
---|
551 | |
---|
552 | IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray(); |
---|
553 | |
---|
554 | //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); |
---|
555 | // spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); |
---|
556 | //not likely to happen |
---|
557 | if (subTrees.size() && indexArray.size() == 1) |
---|
558 | { |
---|
559 | ///DMA in the index info |
---|
560 | dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */); |
---|
561 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
562 | |
---|
563 | //display the headers |
---|
564 | int numBatch = subTrees.size(); |
---|
565 | for (int i=0;i<numBatch;) |
---|
566 | { |
---|
567 | // BEN: TODO - can reorder DMA transfers for less stall |
---|
568 | int remaining = subTrees.size() - i; |
---|
569 | int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS; |
---|
570 | |
---|
571 | dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1); |
---|
572 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
573 | |
---|
574 | |
---|
575 | // spu_printf("nextBatch = %d\n",nextBatch); |
---|
576 | |
---|
577 | |
---|
578 | for (int j=0;j<nextBatch;j++) |
---|
579 | { |
---|
580 | const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j]; |
---|
581 | |
---|
582 | unsigned int overlap = 1; |
---|
583 | for (int boxId = 0; boxId < numWorkUnits; boxId++) |
---|
584 | { |
---|
585 | overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); |
---|
586 | if (overlap) |
---|
587 | break; |
---|
588 | } |
---|
589 | |
---|
590 | if (overlap) |
---|
591 | { |
---|
592 | btAssert(subtree.m_subtreeSize); |
---|
593 | |
---|
594 | //dma the actual nodes of this subtree |
---|
595 | dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2); |
---|
596 | |
---|
597 | cellDmaWaitTagStatusAll(DMA_MASK(2)); |
---|
598 | |
---|
599 | /* Walk this subtree */ |
---|
600 | |
---|
601 | { |
---|
602 | |
---|
603 | spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr, |
---|
604 | &nodeCallback, |
---|
605 | &rayFromInTriangleSpace[0], |
---|
606 | &rayToInTriangleSpace[0], |
---|
607 | numWorkUnits, |
---|
608 | &quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0], |
---|
609 | &lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize); |
---|
610 | } |
---|
611 | } |
---|
612 | // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); |
---|
613 | } |
---|
614 | |
---|
615 | // unsigned short int m_quantizedAabbMin[3]; |
---|
616 | // unsigned short int m_quantizedAabbMax[3]; |
---|
617 | // int m_rootNodeIndex; |
---|
618 | // int m_subtreeSize; |
---|
619 | i+=nextBatch; |
---|
620 | } |
---|
621 | |
---|
622 | //pre-fetch first tree, then loop and double buffer |
---|
623 | } |
---|
624 | |
---|
625 | } |
---|
626 | |
---|
627 | void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
628 | { |
---|
629 | //XXX spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n"); |
---|
630 | } |
---|
631 | |
---|
632 | void |
---|
633 | performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) |
---|
634 | { |
---|
635 | SpuVoronoiSimplexSolver simplexSolver; |
---|
636 | |
---|
637 | btTransform rayFromTrans, rayToTrans; |
---|
638 | rayFromTrans.setIdentity (); |
---|
639 | rayFromTrans.setOrigin (workUnit.rayFrom); |
---|
640 | rayToTrans.setIdentity (); |
---|
641 | rayToTrans.setOrigin (workUnit.rayTo); |
---|
642 | |
---|
643 | SpuCastResult result; |
---|
644 | |
---|
645 | /* Load the vertex data if the shape is a convex hull */ |
---|
646 | /* XXX: We might be loading the shape twice */ |
---|
647 | ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]); |
---|
648 | if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE) |
---|
649 | { |
---|
650 | register int dmaSize; |
---|
651 | register ppu_address_t dmaPpuAddress2; |
---|
652 | dmaSize = sizeof(btConvexHullShape); |
---|
653 | dmaPpuAddress2 = gatheredObjectData->m_collisionShape; |
---|
654 | cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); |
---|
655 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
656 | dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape); |
---|
657 | cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2! |
---|
658 | lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape; |
---|
659 | lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0]; |
---|
660 | } |
---|
661 | |
---|
662 | /* performRaycast */ |
---|
663 | SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver); |
---|
664 | bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result); |
---|
665 | |
---|
666 | if (r) |
---|
667 | { |
---|
668 | workUnitOut->hitFraction = result.m_fraction; |
---|
669 | workUnitOut->hitNormal = result.m_normal; |
---|
670 | } |
---|
671 | } |
---|
672 | |
---|
673 | void processRaycastTask(void* userPtr, void* lsMemory) |
---|
674 | { |
---|
675 | RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory; |
---|
676 | |
---|
677 | SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr; |
---|
678 | SpuRaycastTaskDesc& taskDesc = *taskDescPtr; |
---|
679 | |
---|
680 | SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers; |
---|
681 | |
---|
682 | //spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers); |
---|
683 | /* for each object */ |
---|
684 | RaycastGatheredObjectData gatheredObjectData; |
---|
685 | for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++) |
---|
686 | { |
---|
687 | //spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers); |
---|
688 | |
---|
689 | /* load initial collision shape */ |
---|
690 | GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]); |
---|
691 | |
---|
692 | if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType)) |
---|
693 | { |
---|
694 | SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK]; |
---|
695 | for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
696 | { |
---|
697 | tWorkUnitsOut[rayId].hitFraction = 1.0; |
---|
698 | } |
---|
699 | |
---|
700 | performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory); |
---|
701 | |
---|
702 | for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
703 | { |
---|
704 | const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; |
---|
705 | if (tWorkUnitsOut[rayId].hitFraction == 1.0) |
---|
706 | continue; |
---|
707 | |
---|
708 | ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); |
---|
709 | dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
710 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
711 | |
---|
712 | |
---|
713 | /* XXX Only support taking the closest hit for now */ |
---|
714 | if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction) |
---|
715 | { |
---|
716 | workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction; |
---|
717 | workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal; |
---|
718 | } |
---|
719 | |
---|
720 | /* write ray cast data back */ |
---|
721 | dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
722 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
723 | } |
---|
724 | } else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) { |
---|
725 | |
---|
726 | btVector3 objectBoxMin, objectBoxMax; |
---|
727 | computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform); |
---|
728 | for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
729 | { |
---|
730 | const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; |
---|
731 | |
---|
732 | btScalar ignored_param = 1.0; |
---|
733 | btVector3 ignored_normal; |
---|
734 | if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal)) |
---|
735 | { |
---|
736 | ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); |
---|
737 | SpuRaycastTaskWorkUnitOut tWorkUnitOut; |
---|
738 | tWorkUnitOut.hitFraction = 1.0; |
---|
739 | |
---|
740 | performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); |
---|
741 | if (tWorkUnitOut.hitFraction == 1.0) |
---|
742 | continue; |
---|
743 | |
---|
744 | dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
745 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
746 | |
---|
747 | /* XXX Only support taking the closest hit for now */ |
---|
748 | if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction) |
---|
749 | { |
---|
750 | workUnitOut.hitFraction = tWorkUnitOut.hitFraction; |
---|
751 | workUnitOut.hitNormal = tWorkUnitOut.hitNormal; |
---|
752 | /* write ray cast data back */ |
---|
753 | dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
754 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
755 | } |
---|
756 | } |
---|
757 | } |
---|
758 | |
---|
759 | } else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) { |
---|
760 | for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) |
---|
761 | { |
---|
762 | const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; |
---|
763 | ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); |
---|
764 | SpuRaycastTaskWorkUnitOut tWorkUnitOut; |
---|
765 | tWorkUnitOut.hitFraction = 1.0; |
---|
766 | |
---|
767 | performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); |
---|
768 | if (tWorkUnitOut.hitFraction == 1.0) |
---|
769 | continue; |
---|
770 | |
---|
771 | dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
772 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
773 | /* XXX Only support taking the closest hit for now */ |
---|
774 | if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction) |
---|
775 | { |
---|
776 | workUnitOut.hitFraction = tWorkUnitOut.hitFraction; |
---|
777 | workUnitOut.hitNormal = tWorkUnitOut.hitNormal; |
---|
778 | } |
---|
779 | |
---|
780 | /* write ray cast data back */ |
---|
781 | dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); |
---|
782 | cellDmaWaitTagStatusAll(DMA_MASK(1)); |
---|
783 | } |
---|
784 | } |
---|
785 | } |
---|
786 | } |
---|