Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/branches/physics/src/bullet/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp @ 1966

Last change on this file since 1966 was 1966, checked in by rgrieder, 17 years ago
Let's go for multithreaded physics!
Property svn:eol-style set to `native`
File size: 29.3 KB

Line
1
2
3	#include "../PlatformDefinitions.h"
4	#include "SpuRaycastTask.h"
5	#include "../SpuCollisionObjectWrapper.h"
6	#include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
7	#include "SpuSubSimplexConvexCast.h"
8	#include "LinearMath/btAabbUtil2.h"
9
10
11	/* Future optimization strategies:
12	1. BBOX prune before loading shape data
13	2. Could reduce number of dmas for ray output data to a single read and write.
14	By sharing the temporary work unit output structures across objects.
15	3. The reason SpuRaycastNodeCallback1 is slower is because the triangle data isn't
16	being cached across calls. Fix that by doing the final ray pruning inside the callback.
17	*/
18
19	/* Future work:
20	1. support first hit, closest hit, etc rather than just closest hit.
21	2. support compound objects
22	*/
23
24	#define CALLBACK_ALL
25
26	struct RaycastTask_LocalStoreMemory
27	{
28	ATTRIBUTE_ALIGNED16(char gColObj [sizeof(btCollisionObject)+16]);
29	btCollisionObject* getColObj()
30	{
31	return (btCollisionObject*) gColObj;
32	}
33
34	ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper);
35	SpuCollisionObjectWrapper* getCollisionObjectWrapper ()
36	{
37	return &gCollisionObjectWrapper;
38	}
39
40	CollisionShape_LocalStoreMemory gCollisionShape;
41	ATTRIBUTE_ALIGNED16(int spuIndices[16]);
42
43	bvhMeshShape_LocalStoreMemory bvhShapeData;
44	SpuConvexPolyhedronVertexData convexVertexData;
45	CompoundShape_LocalStoreMemory compoundShapeData;
46	};
47
48	#ifdef WIN32
49	void* createRaycastLocalStoreMemory()
50	{
51	return new RaycastTask_LocalStoreMemory;
52	};
53	#elif defined(__CELLOS_LV2__)
54	ATTRIBUTE_ALIGNED16(RaycastTask_LocalStoreMemory gLocalStoreMemory);
55	void* createRaycastLocalStoreMemory()
56	{
57	return &gLocalStoreMemory;
58	}
59	#endif
60
61	void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper)
62	{
63	register int dmaSize;
64	register ppu_address_t dmaPpuAddress2;
65	/* DMA Collision object wrapper into local store */
66	dmaSize = sizeof(SpuCollisionObjectWrapper);
67	dmaPpuAddress2 = objectWrapper;
68	cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
69	cellDmaWaitTagStatusAll(DMA_MASK(1));
70
71	/* DMA Collision object into local store */
72	dmaSize = sizeof(btCollisionObject);
73	dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr();
74	cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
75	cellDmaWaitTagStatusAll(DMA_MASK(2));
76
77	/* Gather information about collision object and shape */
78	gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform();
79	gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin ();
80	gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType ();
81	gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape();
82	gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape;
83
84	/* DMA shape data */
85	dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType);
86	cellDmaWaitTagStatusAll(DMA_MASK(1));
87	if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType))
88	{
89	btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape;
90	gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions ();
91	} else {
92	gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0);
93	}
94
95	}
96
97	void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
98	{
99	cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
100	}
101
102	void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
103	{
104	cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
105	}
106
107	#if 0
108	SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
109	{
110	#if USE_SOFTWARE_CACHE
111	// Check for alignment requirements. We need to make sure the entire request fits within one cache line,
112	// so the first and last bytes should fall on the same cache line
113	btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
114
115	void* ls = spe_cache_read(ea);
116	memcpy(buffer, ls, size);
117	#else
118	stallingUnalignedDmaSmallGet(buffer,ea,size);
119	#endif
120	}
121	#endif
122
123	void small_cache_read_triple( void* ls0, ppu_address_t ea0,
124	void* ls1, ppu_address_t ea1,
125	void* ls2, ppu_address_t ea2,
126	size_t size)
127	{
128	btAssert(size<16);
129	ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]);
130	ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]);
131	ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]);
132
133	uint32_t i;
134
135
136	///make sure last 4 bits are the same, for cellDmaSmallGet
137	char* localStore0 = (char*)ls0;
138	uint32_t last4BitsOffset = ea0 & 0x0f;
139	char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
140	tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
141
142
143	char* localStore1 = (char*)ls1;
144	last4BitsOffset = ea1 & 0x0f;
145	char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
146	tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
147
148	char* localStore2 = (char*)ls2;
149	last4BitsOffset = ea2 & 0x0f;
150	char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
151	tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
152
153
154	cellDmaWaitTagStatusAll( DMA_MASK(1) );
155
156	//this is slowish, perhaps memcpy on SPU is smarter?
157	for (i=0; btLikely( i<size );i++)
158	{
159	localStore0[i] = tmpTarget0[i];
160	localStore1[i] = tmpTarget1[i];
161	localStore2[i] = tmpTarget2[i];
162	}
163	}
164
165	void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr);
166
167	class spuRaycastNodeCallback1 : public btNodeOverlapCallback
168	{
169	RaycastGatheredObjectData* m_gatheredObjectData;
170	const SpuRaycastTaskWorkUnit* m_workUnits;
171	SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
172	int m_workUnit;
173	RaycastTask_LocalStoreMemory* m_lsMemPtr;
174
175	ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
176	ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
177	//ATTRIBUTE_ALIGNED16(int spuIndices[16]);
178	public:
179	spuRaycastNodeCallback1(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, RaycastTask_LocalStoreMemory* lsMemPtr)
180	: m_gatheredObjectData(gatheredObjectData),
181	m_workUnits(workUnits),
182	m_workUnitsOut(workUnitsOut),
183	m_workUnit(0),
184	m_lsMemPtr (lsMemPtr)
185	{
186	}
187
188	void setWorkUnit (int workUnit) { m_workUnit = workUnit; }
189	virtual void processNode(int subPart, int triangleIndex)
190	{
191	///Create a triangle on the stack, call process collision, with GJK
192	///DMA the vertices, can benefit from software caching
193
194	// spu_printf("processNode with triangleIndex %d\n",triangleIndex);
195
196	// ugly solution to support both 16bit and 32bit indices
197	if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
198	{
199	short int* indexBasePtr = (short int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
200	ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
201
202	small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
203	&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
204	&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
205	sizeof(short int));
206
207	m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
208	m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
209	m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
210	} else
211	{
212	int* indexBasePtr = (int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
213
214	small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
215	&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
216	&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
217	sizeof(int));
218	}
219
220	//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
221	// spu_printf("SPU index0=%d ,",spuIndices[0]);
222	// spu_printf("SPU index1=%d ,",spuIndices[1]);
223	// spu_printf("SPU index2=%d ,",spuIndices[2]);
224	// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
225
226	const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
227
228	for (int j=2;btLikely( j>=0 );j--)
229	{
230	int graphicsindex = m_lsMemPtr->spuIndices[j];
231
232	//spu_printf("SPU index=%d ,",graphicsindex);
233	btScalar* graphicsbasePtr = (btScalar)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindexm_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
234
235	// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
236
237
238	///handle un-aligned vertices...
239
240	//another DMA for each vertex
241	small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
242	&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
243	&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
244	sizeof(btScalar));
245
246	//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
247	spuTriangleVertices[j] = btVector3(
248	spuUnscaledVertex[0]*meshScaling.getX(),
249	spuUnscaledVertex[1]*meshScaling.getY(),
250	spuUnscaledVertex[2]*meshScaling.getZ());
251
252	//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
253	}
254
255	RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
256	triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
257	triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0];
258
259	//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
260	//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
261	//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
262	SpuRaycastTaskWorkUnitOut out;
263	out.hitFraction = 1.0;
264	performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[m_workUnit], &out, m_lsMemPtr);
265	/* XXX: For now only take the closest hit */
266	if (out.hitFraction < m_workUnitsOut[m_workUnit].hitFraction)
267	{
268	m_workUnitsOut[m_workUnit].hitFraction = out.hitFraction;
269	m_workUnitsOut[m_workUnit].hitNormal = out.hitNormal;
270	}
271	}
272
273	};
274
275	class spuRaycastNodeCallback : public btNodeOverlapCallback
276	{
277	RaycastGatheredObjectData* m_gatheredObjectData;
278	const SpuRaycastTaskWorkUnit* m_workUnits;
279	SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
280	int m_numWorkUnits;
281	RaycastTask_LocalStoreMemory* m_lsMemPtr;
282
283	ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
284	ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
285	//ATTRIBUTE_ALIGNED16(int spuIndices[16]);
286	public:
287	spuRaycastNodeCallback(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
288	: m_gatheredObjectData(gatheredObjectData),
289	m_workUnits(workUnits),
290	m_workUnitsOut(workUnitsOut),
291	m_numWorkUnits(numWorkUnits),
292	m_lsMemPtr (lsMemPtr)
293	{
294	}
295
296	virtual void processNode(int subPart, int triangleIndex)
297	{
298	///Create a triangle on the stack, call process collision, with GJK
299	///DMA the vertices, can benefit from software caching
300
301	// spu_printf("processNode with triangleIndex %d\n",triangleIndex);
302
303	// ugly solution to support both 16bit and 32bit indices
304	if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
305	{
306	short int* indexBasePtr = (short int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
307	ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
308
309	small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
310	&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
311	&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
312	sizeof(short int));
313
314	m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
315	m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
316	m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
317	} else
318	{
319	int* indexBasePtr = (int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
320
321	small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
322	&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
323	&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
324	sizeof(int));
325	}
326
327	//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
328	// spu_printf("SPU index0=%d ,",spuIndices[0]);
329	// spu_printf("SPU index1=%d ,",spuIndices[1]);
330	// spu_printf("SPU index2=%d ,",spuIndices[2]);
331	// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
332
333	const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
334
335	for (int j=2;btLikely( j>=0 );j--)
336	{
337	int graphicsindex = m_lsMemPtr->spuIndices[j];
338
339	//spu_printf("SPU index=%d ,",graphicsindex);
340	btScalar* graphicsbasePtr = (btScalar)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindexm_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
341
342	// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
343
344
345	///handle un-aligned vertices...
346
347	//another DMA for each vertex
348	small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
349	&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
350	&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
351	sizeof(btScalar));
352
353	//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
354	spuTriangleVertices[j] = btVector3(
355	spuUnscaledVertex[0]*meshScaling.getX(),
356	spuUnscaledVertex[1]*meshScaling.getY(),
357	spuUnscaledVertex[2]*meshScaling.getZ());
358
359	//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
360	}
361
362	RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
363	triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
364	triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0];
365
366	//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
367	//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
368	//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
369	for (int i = 0; i < m_numWorkUnits; i++)
370	{
371	SpuRaycastTaskWorkUnitOut out;
372	out.hitFraction = 1.0;
373	performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[i], &out, m_lsMemPtr);
374	/* XXX: For now only take the closest hit */
375	if (out.hitFraction < m_workUnitsOut[i].hitFraction)
376	{
377	m_workUnitsOut[i].hitFraction = out.hitFraction;
378	m_workUnitsOut[i].hitNormal = out.hitNormal;
379	}
380	}
381	}
382
383	};
384
385
386	void spuWalkStacklessQuantizedTreeAgainstRays(RaycastTask_LocalStoreMemory* lsMemPtr,
387	btNodeOverlapCallback* nodeCallback,
388	const btVector3* rayFrom,
389	const btVector3* rayTo,
390	int numWorkUnits,
391	unsigned short int* quantizedQueryAabbMin,
392	unsigned short int* quantizedQueryAabbMax,
393	const btQuantizedBvhNode* rootNode,
394	int startNodeIndex,int endNodeIndex)
395	{
396	int curIndex = startNodeIndex;
397	int walkIterations = 0;
398	int subTreeSize = endNodeIndex - startNodeIndex;
399
400	int escapeIndex;
401
402	unsigned int boxBoxOverlap, rayBoxOverlap, anyRayBoxOverlap;
403	unsigned int isLeafNode;
404
405	#define RAYAABB2
406	#ifdef RAYAABB2
407	unsigned int sign[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
408	btVector3 rayInvDirection[SPU_RAYCAST_WORK_UNITS_PER_TASK];
409	btScalar lambda_max[SPU_RAYCAST_WORK_UNITS_PER_TASK];
410	for (int i = 0; i < numWorkUnits; i++)
411	{
412	btVector3 rayDirection = (rayTo[i]-rayFrom[i]);
413	rayDirection.normalize ();
414	lambda_max[i] = rayDirection.dot(rayTo[i]-rayFrom[i]);
415	rayInvDirection[i][0] = btScalar(1.0) / rayDirection[0];
416	rayInvDirection[i][1] = btScalar(1.0) / rayDirection[1];
417	rayInvDirection[i][2] = btScalar(1.0) / rayDirection[2];
418	sign[i][0] = rayDirection[0] < 0.0;
419	sign[i][1] = rayDirection[1] < 0.0;
420	sign[i][2] = rayDirection[2] < 0.0;
421	}
422	#endif
423
424	while (curIndex < endNodeIndex)
425	{
426	//catch bugs in tree data
427	assert (walkIterations < subTreeSize);
428
429	walkIterations++;
430
431	isLeafNode = rootNode->isLeafNode();
432
433	anyRayBoxOverlap = 0;
434
435	for (int i = 0; i < numWorkUnits; i++)
436	{
437	unsigned short int* quamin = (quantizedQueryAabbMin + 3 * i);
438	unsigned short int* quamax = (quantizedQueryAabbMax + 3 * i);
439	boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quamin,quamax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
440	if (!boxBoxOverlap)
441	continue;
442
443	rayBoxOverlap = 0;
444	btScalar param = 1.0;
445	btVector3 normal;
446	btVector3 bounds[2];
447	bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin);
448	bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax);
449	#ifdef RAYAABB2
450	rayBoxOverlap = btRayAabb2 (rayFrom[i], rayInvDirection[i], sign[i], bounds, param, 0.0, lambda_max[i]);
451	#else
452	rayBoxOverlap = btRayAabb(rayFrom[i], rayTo[i], bounds[0], bounds[1], param, normal);
453	#endif
454
455	#ifndef CALLBACK_ALL
456	anyRayBoxOverlap = rayBoxOverlap \|\| anyRayBoxOverlap;
457	/* If we have any ray vs. box overlap and this isn't a leaf node
458	we know that we need to dig deeper
459	*/
460	if (!isLeafNode && anyRayBoxOverlap)
461	break;
462
463	if (isLeafNode && rayBoxOverlap)
464	{
465	spuRaycastNodeCallback1* callback = (spuRaycastNodeCallback1*)nodeCallback;
466	callback->setWorkUnit (i);
467	nodeCallback->processNode (0, rootNode->getTriangleIndex());
468	}
469	#else
470	/* If we have any ray vs. box overlap and this isn't a leaf node
471	we know that we need to dig deeper
472	*/
473	if (rayBoxOverlap)
474	{
475	anyRayBoxOverlap = 1;
476	break;
477	}
478	#endif
479	}
480
481	#ifdef CALLBACK_ALL
482	if (isLeafNode && anyRayBoxOverlap)
483	{
484	nodeCallback->processNode (0, rootNode->getTriangleIndex());
485	}
486	#endif
487
488	if (anyRayBoxOverlap \|\| isLeafNode)
489	{
490	rootNode++;
491	curIndex++;
492	} else
493	{
494	escapeIndex = rootNode->getEscapeIndex();
495	rootNode += escapeIndex;
496	curIndex += escapeIndex;
497	}
498	}
499
500	}
501
502
503	void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
504	{
505	//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
506	register int dmaSize;
507	register ppu_address_t dmaPpuAddress2;
508
509
510	btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape;
511
512	//need the mesh interface, for access to triangle vertices
513	dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape);
514
515	unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
516	unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
517	btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
518	btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
519
520	/* Calculate the AABB for the ray in the triangle mesh shape */
521	btTransform rayInTriangleSpace;
522	rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse();
523
524	for (int i = 0; i < numWorkUnits; i++)
525	{
526	btVector3 aabbMin;
527	btVector3 aabbMax;
528
529	rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom);
530	rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo);
531
532	aabbMin = rayFromInTriangleSpace[i];
533	aabbMin.setMin (rayToInTriangleSpace[i]);
534	aabbMax = rayFromInTriangleSpace[i];
535	aabbMax.setMax (rayToInTriangleSpace[i]);
536
537	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0);
538	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1);
539	}
540
541	QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
542	//spu_printf("SPU: numNodes = %d\n",nodeArray.size());
543
544	BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();
545
546	#ifdef CALLBACK_ALL
547	spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr);
548	#else
549	spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr);
550	#endif
551
552	IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
553
554	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
555	// spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
556	//not likely to happen
557	if (subTrees.size() && indexArray.size() == 1)
558	{
559	///DMA in the index info
560	dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray /, 1 / dmaTag */);
561	cellDmaWaitTagStatusAll(DMA_MASK(1));
562
563	//display the headers
564	int numBatch = subTrees.size();
565	for (int i=0;i<numBatch;)
566	{
567	// BEN: TODO - can reorder DMA transfers for less stall
568	int remaining = subTrees.size() - i;
569	int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
570
571	dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
572	cellDmaWaitTagStatusAll(DMA_MASK(1));
573
574
575	// spu_printf("nextBatch = %d\n",nextBatch);
576
577
578	for (int j=0;j<nextBatch;j++)
579	{
580	const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
581
582	unsigned int overlap = 1;
583	for (int boxId = 0; boxId < numWorkUnits; boxId++)
584	{
585	overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
586	if (overlap)
587	break;
588	}
589
590	if (overlap)
591	{
592	btAssert(subtree.m_subtreeSize);
593
594	//dma the actual nodes of this subtree
595	dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
596
597	cellDmaWaitTagStatusAll(DMA_MASK(2));
598
599	/* Walk this subtree */
600
601	{
602
603	spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr,
604	&nodeCallback,
605	&rayFromInTriangleSpace[0],
606	&rayToInTriangleSpace[0],
607	numWorkUnits,
608	&quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0],
609	&lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize);
610	}
611	}
612	// spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
613	}
614
615	// unsigned short int m_quantizedAabbMin[3];
616	// unsigned short int m_quantizedAabbMax[3];
617	// int m_rootNodeIndex;
618	// int m_subtreeSize;
619	i+=nextBatch;
620	}
621
622	//pre-fetch first tree, then loop and double buffer
623	}
624
625	}
626
627	void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
628	{
629	//XXX spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n");
630	}
631
632	void
633	performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
634	{
635	SpuVoronoiSimplexSolver simplexSolver;
636
637	btTransform rayFromTrans, rayToTrans;
638	rayFromTrans.setIdentity ();
639	rayFromTrans.setOrigin (workUnit.rayFrom);
640	rayToTrans.setIdentity ();
641	rayToTrans.setOrigin (workUnit.rayTo);
642
643	SpuCastResult result;
644
645	/* Load the vertex data if the shape is a convex hull */
646	/* XXX: We might be loading the shape twice */
647	ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]);
648	if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE)
649	{
650	register int dmaSize;
651	register ppu_address_t dmaPpuAddress2;
652	dmaSize = sizeof(btConvexHullShape);
653	dmaPpuAddress2 = gatheredObjectData->m_collisionShape;
654	cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
655	cellDmaWaitTagStatusAll(DMA_MASK(1));
656	dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape);
657	cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2!
658	lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape;
659	lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0];
660	}
661
662	/* performRaycast */
663	SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver);
664	bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result);
665
666	if (r)
667	{
668	workUnitOut->hitFraction = result.m_fraction;
669	workUnitOut->hitNormal = result.m_normal;
670	}
671	}
672
673	void processRaycastTask(void* userPtr, void* lsMemory)
674	{
675	RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory;
676
677	SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr;
678	SpuRaycastTaskDesc& taskDesc = *taskDescPtr;
679
680	SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers;
681
682	//spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers);
683	/* for each object */
684	RaycastGatheredObjectData gatheredObjectData;
685	for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++)
686	{
687	//spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers);
688
689	/* load initial collision shape */
690	GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]);
691
692	if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType))
693	{
694	SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK];
695	for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
696	{
697	tWorkUnitsOut[rayId].hitFraction = 1.0;
698	}
699
700	performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory);
701
702	for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
703	{
704	const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
705	if (tWorkUnitsOut[rayId].hitFraction == 1.0)
706	continue;
707
708	ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
709	dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
710	cellDmaWaitTagStatusAll(DMA_MASK(1));
711
712
713	/* XXX Only support taking the closest hit for now */
714	if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction)
715	{
716	workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction;
717	workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal;
718	}
719
720	/* write ray cast data back */
721	dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
722	cellDmaWaitTagStatusAll(DMA_MASK(1));
723	}
724	} else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) {
725
726	btVector3 objectBoxMin, objectBoxMax;
727	computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform);
728	for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
729	{
730	const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
731
732	btScalar ignored_param = 1.0;
733	btVector3 ignored_normal;
734	if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal))
735	{
736	ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
737	SpuRaycastTaskWorkUnitOut tWorkUnitOut;
738	tWorkUnitOut.hitFraction = 1.0;
739
740	performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
741	if (tWorkUnitOut.hitFraction == 1.0)
742	continue;
743
744	dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
745	cellDmaWaitTagStatusAll(DMA_MASK(1));
746
747	/* XXX Only support taking the closest hit for now */
748	if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
749	{
750	workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
751	workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
752	/* write ray cast data back */
753	dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
754	cellDmaWaitTagStatusAll(DMA_MASK(1));
755	}
756	}
757	}
758
759	} else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) {
760	for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
761	{
762	const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
763	ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
764	SpuRaycastTaskWorkUnitOut tWorkUnitOut;
765	tWorkUnitOut.hitFraction = 1.0;
766
767	performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
768	if (tWorkUnitOut.hitFraction == 1.0)
769	continue;
770
771	dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
772	cellDmaWaitTagStatusAll(DMA_MASK(1));
773	/* XXX Only support taking the closest hit for now */
774	if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
775	{
776	workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
777	workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
778	}
779
780	/* write ray cast data back */
781	dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
782	cellDmaWaitTagStatusAll(DMA_MASK(1));
783	}
784	}
785	}
786	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: