Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/branches/physics/src/bullet/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp @ 1966

Last change on this file since 1966 was 1966, checked in by rgrieder, 16 years ago
Let's go for multithreaded physics!
Property svn:eol-style set to `native`
File size: 29.3 KB

Rev	Line
[1966]	1
	2
	3	#include "../PlatformDefinitions.h"
	4	#include "SpuRaycastTask.h"
	5	#include "../SpuCollisionObjectWrapper.h"
	6	#include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
	7	#include "SpuSubSimplexConvexCast.h"
	8	#include "LinearMath/btAabbUtil2.h"
	9
	10
	11	/* Future optimization strategies:
	12	1. BBOX prune before loading shape data
	13	2. Could reduce number of dmas for ray output data to a single read and write.
	14	By sharing the temporary work unit output structures across objects.
	15	3. The reason SpuRaycastNodeCallback1 is slower is because the triangle data isn't
	16	being cached across calls. Fix that by doing the final ray pruning inside the callback.
	17	*/
	18
	19	/* Future work:
	20	1. support first hit, closest hit, etc rather than just closest hit.
	21	2. support compound objects
	22	*/
	23
	24	#define CALLBACK_ALL
	25
	26	struct RaycastTask_LocalStoreMemory
	27	{
	28	ATTRIBUTE_ALIGNED16(char gColObj [sizeof(btCollisionObject)+16]);
	29	btCollisionObject* getColObj()
	30	{
	31	return (btCollisionObject*) gColObj;
	32	}
	33
	34	ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper);
	35	SpuCollisionObjectWrapper* getCollisionObjectWrapper ()
	36	{
	37	return &gCollisionObjectWrapper;
	38	}
	39
	40	CollisionShape_LocalStoreMemory gCollisionShape;
	41	ATTRIBUTE_ALIGNED16(int spuIndices[16]);
	42
	43	bvhMeshShape_LocalStoreMemory bvhShapeData;
	44	SpuConvexPolyhedronVertexData convexVertexData;
	45	CompoundShape_LocalStoreMemory compoundShapeData;
	46	};
	47
	48	#ifdef WIN32
	49	void* createRaycastLocalStoreMemory()
	50	{
	51	return new RaycastTask_LocalStoreMemory;
	52	};
	53	#elif defined(__CELLOS_LV2__)
	54	ATTRIBUTE_ALIGNED16(RaycastTask_LocalStoreMemory gLocalStoreMemory);
	55	void* createRaycastLocalStoreMemory()
	56	{
	57	return &gLocalStoreMemory;
	58	}
	59	#endif
	60
	61	void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper)
	62	{
	63	register int dmaSize;
	64	register ppu_address_t dmaPpuAddress2;
	65	/* DMA Collision object wrapper into local store */
	66	dmaSize = sizeof(SpuCollisionObjectWrapper);
	67	dmaPpuAddress2 = objectWrapper;
	68	cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
	69	cellDmaWaitTagStatusAll(DMA_MASK(1));
	70
	71	/* DMA Collision object into local store */
	72	dmaSize = sizeof(btCollisionObject);
	73	dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr();
	74	cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
	75	cellDmaWaitTagStatusAll(DMA_MASK(2));
	76
	77	/* Gather information about collision object and shape */
	78	gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform();
	79	gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin ();
	80	gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType ();
	81	gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape();
	82	gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape;
	83
	84	/* DMA shape data */
	85	dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType);
	86	cellDmaWaitTagStatusAll(DMA_MASK(1));
	87	if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType))
	88	{
	89	btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape;
	90	gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions ();
	91	} else {
	92	gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0);
	93	}
	94
	95	}
	96
	97	void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
	98	{
	99	cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
	100	}
	101
	102	void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
	103	{
	104	cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
	105	}
	106
	107	#if 0
	108	SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
	109	{
	110	#if USE_SOFTWARE_CACHE
	111	// Check for alignment requirements. We need to make sure the entire request fits within one cache line,
	112	// so the first and last bytes should fall on the same cache line
	113	btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
	114
	115	void* ls = spe_cache_read(ea);
	116	memcpy(buffer, ls, size);
	117	#else
	118	stallingUnalignedDmaSmallGet(buffer,ea,size);
	119	#endif
	120	}
	121	#endif
	122
	123	void small_cache_read_triple( void* ls0, ppu_address_t ea0,
	124	void* ls1, ppu_address_t ea1,
	125	void* ls2, ppu_address_t ea2,
	126	size_t size)
	127	{
	128	btAssert(size<16);
	129	ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]);
	130	ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]);
	131	ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]);
	132
	133	uint32_t i;
	134
	135
	136	///make sure last 4 bits are the same, for cellDmaSmallGet
	137	char* localStore0 = (char*)ls0;
	138	uint32_t last4BitsOffset = ea0 & 0x0f;
	139	char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
	140	tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
	141
	142
	143	char* localStore1 = (char*)ls1;
	144	last4BitsOffset = ea1 & 0x0f;
	145	char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
	146	tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
	147
	148	char* localStore2 = (char*)ls2;
	149	last4BitsOffset = ea2 & 0x0f;
	150	char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
	151	tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
	152
	153
	154	cellDmaWaitTagStatusAll( DMA_MASK(1) );
	155
	156	//this is slowish, perhaps memcpy on SPU is smarter?
	157	for (i=0; btLikely( i<size );i++)
	158	{
	159	localStore0[i] = tmpTarget0[i];
	160	localStore1[i] = tmpTarget1[i];
	161	localStore2[i] = tmpTarget2[i];
	162	}
	163	}
	164
	165	void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr);
	166
	167	class spuRaycastNodeCallback1 : public btNodeOverlapCallback
	168	{
	169	RaycastGatheredObjectData* m_gatheredObjectData;
	170	const SpuRaycastTaskWorkUnit* m_workUnits;
	171	SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
	172	int m_workUnit;
	173	RaycastTask_LocalStoreMemory* m_lsMemPtr;
	174
	175	ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
	176	ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
	177	//ATTRIBUTE_ALIGNED16(int spuIndices[16]);
	178	public:
	179	spuRaycastNodeCallback1(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, RaycastTask_LocalStoreMemory* lsMemPtr)
	180	: m_gatheredObjectData(gatheredObjectData),
	181	m_workUnits(workUnits),
	182	m_workUnitsOut(workUnitsOut),
	183	m_workUnit(0),
	184	m_lsMemPtr (lsMemPtr)
	185	{
	186	}
	187
	188	void setWorkUnit (int workUnit) { m_workUnit = workUnit; }
	189	virtual void processNode(int subPart, int triangleIndex)
	190	{
	191	///Create a triangle on the stack, call process collision, with GJK
	192	///DMA the vertices, can benefit from software caching
	193
	194	// spu_printf("processNode with triangleIndex %d\n",triangleIndex);
	195
	196	// ugly solution to support both 16bit and 32bit indices
	197	if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
	198	{
	199	short int* indexBasePtr = (short int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
	200	ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
	201
	202	small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
	203	&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
	204	&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
	205	sizeof(short int));
	206
	207	m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
	208	m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
	209	m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
	210	} else
	211	{
	212	int* indexBasePtr = (int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
	213
	214	small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
	215	&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
	216	&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
	217	sizeof(int));
	218	}
	219
	220	//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
	221	// spu_printf("SPU index0=%d ,",spuIndices[0]);
	222	// spu_printf("SPU index1=%d ,",spuIndices[1]);
	223	// spu_printf("SPU index2=%d ,",spuIndices[2]);
	224	// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
	225
	226	const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
	227
	228	for (int j=2;btLikely( j>=0 );j--)
	229	{
	230	int graphicsindex = m_lsMemPtr->spuIndices[j];
	231
	232	//spu_printf("SPU index=%d ,",graphicsindex);
	233	btScalar* graphicsbasePtr = (btScalar)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindexm_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
	234
	235	// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
	236
	237
	238	///handle un-aligned vertices...
	239
	240	//another DMA for each vertex
	241	small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
	242	&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
	243	&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
	244	sizeof(btScalar));
	245
	246	//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
	247	spuTriangleVertices[j] = btVector3(
	248	spuUnscaledVertex[0]*meshScaling.getX(),
	249	spuUnscaledVertex[1]*meshScaling.getY(),
	250	spuUnscaledVertex[2]*meshScaling.getZ());
	251
	252	//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
	253	}
	254
	255	RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
	256	triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
	257	triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0];
	258
	259	//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
	260	//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
	261	//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
	262	SpuRaycastTaskWorkUnitOut out;
	263	out.hitFraction = 1.0;
	264	performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[m_workUnit], &out, m_lsMemPtr);
	265	/* XXX: For now only take the closest hit */
	266	if (out.hitFraction < m_workUnitsOut[m_workUnit].hitFraction)
	267	{
	268	m_workUnitsOut[m_workUnit].hitFraction = out.hitFraction;
	269	m_workUnitsOut[m_workUnit].hitNormal = out.hitNormal;
	270	}
	271	}
	272
	273	};
	274
	275	class spuRaycastNodeCallback : public btNodeOverlapCallback
	276	{
	277	RaycastGatheredObjectData* m_gatheredObjectData;
	278	const SpuRaycastTaskWorkUnit* m_workUnits;
	279	SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
	280	int m_numWorkUnits;
	281	RaycastTask_LocalStoreMemory* m_lsMemPtr;
	282
	283	ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
	284	ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
	285	//ATTRIBUTE_ALIGNED16(int spuIndices[16]);
	286	public:
	287	spuRaycastNodeCallback(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
	288	: m_gatheredObjectData(gatheredObjectData),
	289	m_workUnits(workUnits),
	290	m_workUnitsOut(workUnitsOut),
	291	m_numWorkUnits(numWorkUnits),
	292	m_lsMemPtr (lsMemPtr)
	293	{
	294	}
	295
	296	virtual void processNode(int subPart, int triangleIndex)
	297	{
	298	///Create a triangle on the stack, call process collision, with GJK
	299	///DMA the vertices, can benefit from software caching
	300
	301	// spu_printf("processNode with triangleIndex %d\n",triangleIndex);
	302
	303	// ugly solution to support both 16bit and 32bit indices
	304	if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
	305	{
	306	short int* indexBasePtr = (short int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
	307	ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
	308
	309	small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
	310	&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
	311	&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
	312	sizeof(short int));
	313
	314	m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
	315	m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
	316	m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
	317	} else
	318	{
	319	int* indexBasePtr = (int)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndexm_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
	320
	321	small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
	322	&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
	323	&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
	324	sizeof(int));
	325	}
	326
	327	//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
	328	// spu_printf("SPU index0=%d ,",spuIndices[0]);
	329	// spu_printf("SPU index1=%d ,",spuIndices[1]);
	330	// spu_printf("SPU index2=%d ,",spuIndices[2]);
	331	// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
	332
	333	const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
	334
	335	for (int j=2;btLikely( j>=0 );j--)
	336	{
	337	int graphicsindex = m_lsMemPtr->spuIndices[j];
	338
	339	//spu_printf("SPU index=%d ,",graphicsindex);
	340	btScalar* graphicsbasePtr = (btScalar)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindexm_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
	341
	342	// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
	343
	344
	345	///handle un-aligned vertices...
	346
	347	//another DMA for each vertex
	348	small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
	349	&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
	350	&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
	351	sizeof(btScalar));
	352
	353	//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
	354	spuTriangleVertices[j] = btVector3(
	355	spuUnscaledVertex[0]*meshScaling.getX(),
	356	spuUnscaledVertex[1]*meshScaling.getY(),
	357	spuUnscaledVertex[2]*meshScaling.getZ());
	358
	359	//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
	360	}
	361
	362	RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
	363	triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
	364	triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0];
	365
	366	//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
	367	//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
	368	//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
	369	for (int i = 0; i < m_numWorkUnits; i++)
	370	{
	371	SpuRaycastTaskWorkUnitOut out;
	372	out.hitFraction = 1.0;
	373	performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[i], &out, m_lsMemPtr);
	374	/* XXX: For now only take the closest hit */
	375	if (out.hitFraction < m_workUnitsOut[i].hitFraction)
	376	{
	377	m_workUnitsOut[i].hitFraction = out.hitFraction;
	378	m_workUnitsOut[i].hitNormal = out.hitNormal;
	379	}
	380	}
	381	}
	382
	383	};
	384
	385
	386	void spuWalkStacklessQuantizedTreeAgainstRays(RaycastTask_LocalStoreMemory* lsMemPtr,
	387	btNodeOverlapCallback* nodeCallback,
	388	const btVector3* rayFrom,
	389	const btVector3* rayTo,
	390	int numWorkUnits,
	391	unsigned short int* quantizedQueryAabbMin,
	392	unsigned short int* quantizedQueryAabbMax,
	393	const btQuantizedBvhNode* rootNode,
	394	int startNodeIndex,int endNodeIndex)
	395	{
	396	int curIndex = startNodeIndex;
	397	int walkIterations = 0;
	398	int subTreeSize = endNodeIndex - startNodeIndex;
	399
	400	int escapeIndex;
	401
	402	unsigned int boxBoxOverlap, rayBoxOverlap, anyRayBoxOverlap;
	403	unsigned int isLeafNode;
	404
	405	#define RAYAABB2
	406	#ifdef RAYAABB2
	407	unsigned int sign[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
	408	btVector3 rayInvDirection[SPU_RAYCAST_WORK_UNITS_PER_TASK];
	409	btScalar lambda_max[SPU_RAYCAST_WORK_UNITS_PER_TASK];
	410	for (int i = 0; i < numWorkUnits; i++)
	411	{
	412	btVector3 rayDirection = (rayTo[i]-rayFrom[i]);
	413	rayDirection.normalize ();
	414	lambda_max[i] = rayDirection.dot(rayTo[i]-rayFrom[i]);
	415	rayInvDirection[i][0] = btScalar(1.0) / rayDirection[0];
	416	rayInvDirection[i][1] = btScalar(1.0) / rayDirection[1];
	417	rayInvDirection[i][2] = btScalar(1.0) / rayDirection[2];
	418	sign[i][0] = rayDirection[0] < 0.0;
	419	sign[i][1] = rayDirection[1] < 0.0;
	420	sign[i][2] = rayDirection[2] < 0.0;
	421	}
	422	#endif
	423
	424	while (curIndex < endNodeIndex)
	425	{
	426	//catch bugs in tree data
	427	assert (walkIterations < subTreeSize);
	428
	429	walkIterations++;
	430
	431	isLeafNode = rootNode->isLeafNode();
	432
	433	anyRayBoxOverlap = 0;
	434
	435	for (int i = 0; i < numWorkUnits; i++)
	436	{
	437	unsigned short int* quamin = (quantizedQueryAabbMin + 3 * i);
	438	unsigned short int* quamax = (quantizedQueryAabbMax + 3 * i);
	439	boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quamin,quamax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
	440	if (!boxBoxOverlap)
	441	continue;
	442
	443	rayBoxOverlap = 0;
	444	btScalar param = 1.0;
	445	btVector3 normal;
	446	btVector3 bounds[2];
	447	bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin);
	448	bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax);
	449	#ifdef RAYAABB2
	450	rayBoxOverlap = btRayAabb2 (rayFrom[i], rayInvDirection[i], sign[i], bounds, param, 0.0, lambda_max[i]);
	451	#else
	452	rayBoxOverlap = btRayAabb(rayFrom[i], rayTo[i], bounds[0], bounds[1], param, normal);
	453	#endif
	454
	455	#ifndef CALLBACK_ALL
	456	anyRayBoxOverlap = rayBoxOverlap \|\| anyRayBoxOverlap;
	457	/* If we have any ray vs. box overlap and this isn't a leaf node
	458	we know that we need to dig deeper
	459	*/
	460	if (!isLeafNode && anyRayBoxOverlap)
	461	break;
	462
	463	if (isLeafNode && rayBoxOverlap)
	464	{
	465	spuRaycastNodeCallback1* callback = (spuRaycastNodeCallback1*)nodeCallback;
	466	callback->setWorkUnit (i);
	467	nodeCallback->processNode (0, rootNode->getTriangleIndex());
	468	}
	469	#else
	470	/* If we have any ray vs. box overlap and this isn't a leaf node
	471	we know that we need to dig deeper
	472	*/
	473	if (rayBoxOverlap)
	474	{
	475	anyRayBoxOverlap = 1;
	476	break;
	477	}
	478	#endif
	479	}
	480
	481	#ifdef CALLBACK_ALL
	482	if (isLeafNode && anyRayBoxOverlap)
	483	{
	484	nodeCallback->processNode (0, rootNode->getTriangleIndex());
	485	}
	486	#endif
	487
	488	if (anyRayBoxOverlap \|\| isLeafNode)
	489	{
	490	rootNode++;
	491	curIndex++;
	492	} else
	493	{
	494	escapeIndex = rootNode->getEscapeIndex();
	495	rootNode += escapeIndex;
	496	curIndex += escapeIndex;
	497	}
	498	}
	499
	500	}
	501
	502
	503	void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
	504	{
	505	//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
	506	register int dmaSize;
	507	register ppu_address_t dmaPpuAddress2;
	508
	509
	510	btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape;
	511
	512	//need the mesh interface, for access to triangle vertices
	513	dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape);
	514
	515	unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
	516	unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
	517	btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
	518	btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
	519
	520	/* Calculate the AABB for the ray in the triangle mesh shape */
	521	btTransform rayInTriangleSpace;
	522	rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse();
	523
	524	for (int i = 0; i < numWorkUnits; i++)
	525	{
	526	btVector3 aabbMin;
	527	btVector3 aabbMax;
	528
	529	rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom);
	530	rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo);
	531
	532	aabbMin = rayFromInTriangleSpace[i];
	533	aabbMin.setMin (rayToInTriangleSpace[i]);
	534	aabbMax = rayFromInTriangleSpace[i];
	535	aabbMax.setMax (rayToInTriangleSpace[i]);
	536
	537	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0);
	538	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1);
	539	}
	540
	541	QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
	542	//spu_printf("SPU: numNodes = %d\n",nodeArray.size());
	543
	544	BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();
	545
	546	#ifdef CALLBACK_ALL
	547	spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr);
	548	#else
	549	spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr);
	550	#endif
	551
	552	IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
	553
	554	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
	555	// spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
	556	//not likely to happen
	557	if (subTrees.size() && indexArray.size() == 1)
	558	{
	559	///DMA in the index info
	560	dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray /, 1 / dmaTag */);
	561	cellDmaWaitTagStatusAll(DMA_MASK(1));
	562
	563	//display the headers
	564	int numBatch = subTrees.size();
	565	for (int i=0;i<numBatch;)
	566	{
	567	// BEN: TODO - can reorder DMA transfers for less stall
	568	int remaining = subTrees.size() - i;
	569	int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
	570
	571	dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
	572	cellDmaWaitTagStatusAll(DMA_MASK(1));
	573
	574
	575	// spu_printf("nextBatch = %d\n",nextBatch);
	576
	577
	578	for (int j=0;j<nextBatch;j++)
	579	{
	580	const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
	581
	582	unsigned int overlap = 1;
	583	for (int boxId = 0; boxId < numWorkUnits; boxId++)
	584	{
	585	overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
	586	if (overlap)
	587	break;
	588	}
	589
	590	if (overlap)
	591	{
	592	btAssert(subtree.m_subtreeSize);
	593
	594	//dma the actual nodes of this subtree
	595	dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
	596
	597	cellDmaWaitTagStatusAll(DMA_MASK(2));
	598
	599	/* Walk this subtree */
	600
	601	{
	602
	603	spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr,
	604	&nodeCallback,
	605	&rayFromInTriangleSpace[0],
	606	&rayToInTriangleSpace[0],
	607	numWorkUnits,
	608	&quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0],
	609	&lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize);
	610	}
	611	}
	612	// spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
	613	}
	614
	615	// unsigned short int m_quantizedAabbMin[3];
	616	// unsigned short int m_quantizedAabbMax[3];
	617	// int m_rootNodeIndex;
	618	// int m_subtreeSize;
	619	i+=nextBatch;
	620	}
	621
	622	//pre-fetch first tree, then loop and double buffer
	623	}
	624
	625	}
	626
	627	void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
	628	{
	629	//XXX spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n");
	630	}
	631
	632	void
	633	performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
	634	{
	635	SpuVoronoiSimplexSolver simplexSolver;
	636
	637	btTransform rayFromTrans, rayToTrans;
	638	rayFromTrans.setIdentity ();
	639	rayFromTrans.setOrigin (workUnit.rayFrom);
	640	rayToTrans.setIdentity ();
	641	rayToTrans.setOrigin (workUnit.rayTo);
	642
	643	SpuCastResult result;
	644
	645	/* Load the vertex data if the shape is a convex hull */
	646	/* XXX: We might be loading the shape twice */
	647	ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]);
	648	if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE)
	649	{
	650	register int dmaSize;
	651	register ppu_address_t dmaPpuAddress2;
	652	dmaSize = sizeof(btConvexHullShape);
	653	dmaPpuAddress2 = gatheredObjectData->m_collisionShape;
	654	cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
	655	cellDmaWaitTagStatusAll(DMA_MASK(1));
	656	dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape);
	657	cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2!
	658	lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape;
	659	lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0];
	660	}
	661
	662	/* performRaycast */
	663	SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver);
	664	bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result);
	665
	666	if (r)
	667	{
	668	workUnitOut->hitFraction = result.m_fraction;
	669	workUnitOut->hitNormal = result.m_normal;
	670	}
	671	}
	672
	673	void processRaycastTask(void* userPtr, void* lsMemory)
	674	{
	675	RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory;
	676
	677	SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr;
	678	SpuRaycastTaskDesc& taskDesc = *taskDescPtr;
	679
	680	SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers;
	681
	682	//spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers);
	683	/* for each object */
	684	RaycastGatheredObjectData gatheredObjectData;
	685	for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++)
	686	{
	687	//spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers);
	688
	689	/* load initial collision shape */
	690	GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]);
	691
	692	if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType))
	693	{
	694	SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK];
	695	for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
	696	{
	697	tWorkUnitsOut[rayId].hitFraction = 1.0;
	698	}
	699
	700	performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory);
	701
	702	for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
	703	{
	704	const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
	705	if (tWorkUnitsOut[rayId].hitFraction == 1.0)
	706	continue;
	707
	708	ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
	709	dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
	710	cellDmaWaitTagStatusAll(DMA_MASK(1));
	711
	712
	713	/* XXX Only support taking the closest hit for now */
	714	if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction)
	715	{
	716	workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction;
	717	workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal;
	718	}
	719
	720	/* write ray cast data back */
	721	dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
	722	cellDmaWaitTagStatusAll(DMA_MASK(1));
	723	}
	724	} else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) {
	725
	726	btVector3 objectBoxMin, objectBoxMax;
	727	computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform);
	728	for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
	729	{
	730	const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
	731
	732	btScalar ignored_param = 1.0;
	733	btVector3 ignored_normal;
	734	if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal))
	735	{
	736	ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
	737	SpuRaycastTaskWorkUnitOut tWorkUnitOut;
	738	tWorkUnitOut.hitFraction = 1.0;
	739
	740	performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
	741	if (tWorkUnitOut.hitFraction == 1.0)
	742	continue;
	743
	744	dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
	745	cellDmaWaitTagStatusAll(DMA_MASK(1));
	746
	747	/* XXX Only support taking the closest hit for now */
	748	if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
	749	{
	750	workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
	751	workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
	752	/* write ray cast data back */
	753	dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
	754	cellDmaWaitTagStatusAll(DMA_MASK(1));
	755	}
	756	}
	757	}
	758
	759	} else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) {
	760	for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
	761	{
	762	const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
	763	ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
	764	SpuRaycastTaskWorkUnitOut tWorkUnitOut;
	765	tWorkUnitOut.hitFraction = 1.0;
	766
	767	performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
	768	if (tWorkUnitOut.hitFraction == 1.0)
	769	continue;
	770
	771	dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
	772	cellDmaWaitTagStatusAll(DMA_MASK(1));
	773	/* XXX Only support taking the closest hit for now */
	774	if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
	775	{
	776	workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
	777	workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
	778	}
	779
	780	/* write ray cast data back */
	781	dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
	782	cellDmaWaitTagStatusAll(DMA_MASK(1));
	783	}
	784	}
	785	}
	786	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: