00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "qbvhaccel.h"
00023 #include "paramset.h"
00024 #include "dynload.h"
00025 #include "error.h"
00026
00027 namespace lux
00028 {
00029
00030
00031 const boost::int16_t QBVHAccel::pathTable[] = {
00032
00033
00034
00035 0x4444, 0x4444, 0x4444, 0x4444,
00036 0x4444, 0x4444, 0x4444, 0x4444,
00037
00038 0x4440, 0x4440, 0x4440, 0x4440,
00039 0x4440, 0x4440, 0x4440, 0x4440,
00040
00041 0x4441, 0x4441, 0x4441, 0x4441,
00042 0x4441, 0x4441, 0x4441, 0x4441,
00043
00044 0x4410, 0x4410, 0x4401, 0x4401,
00045 0x4410, 0x4410, 0x4401, 0x4401,
00046
00047 0x4442, 0x4442, 0x4442, 0x4442,
00048 0x4442, 0x4442, 0x4442, 0x4442,
00049
00050 0x4420, 0x4420, 0x4420, 0x4420,
00051 0x4402, 0x4402, 0x4402, 0x4402,
00052
00053 0x4421, 0x4421, 0x4421, 0x4421,
00054 0x4412, 0x4412, 0x4412, 0x4412,
00055
00056 0x4210, 0x4210, 0x4201, 0x4201,
00057 0x4102, 0x4102, 0x4012, 0x4012,
00058
00059 0x4443, 0x4443, 0x4443, 0x4443,
00060 0x4443, 0x4443, 0x4443, 0x4443,
00061
00062 0x4430, 0x4430, 0x4430, 0x4430,
00063 0x4403, 0x4403, 0x4403, 0x4403,
00064
00065 0x4431, 0x4431, 0x4431, 0x4431,
00066 0x4413, 0x4413, 0x4413, 0x4413,
00067
00068 0x4310, 0x4310, 0x4301, 0x4301,
00069 0x4103, 0x4103, 0x4013, 0x4013,
00070
00071 0x4432, 0x4423, 0x4432, 0x4423,
00072 0x4432, 0x4432, 0x4423, 0x4423,
00073
00074 0x4320, 0x4230, 0x4320, 0x4230,
00075 0x4032, 0x4023, 0x4032, 0x4023,
00076
00077 0x4321, 0x4231, 0x4321, 0x4231,
00078 0x4132, 0x4123, 0x4132, 0x4123,
00079
00080 0x3210, 0x2310, 0x3201, 0x2301,
00081 0x1032, 0x1023, 0x0132, 0x0123
00082 };
00083
00084
00085
00086 QBVHAccel::QBVHAccel(const vector<boost::shared_ptr<Primitive> > &p, int mp, float fst, int sf) : fullSweepThreshold(fst), skipFactor(sf), maxPrimsPerLeaf(mp)
00087 {
00088
00089 vector<boost::shared_ptr<Primitive> > vPrims;
00090 const PrimitiveRefinementHints refineHints(false);
00091 for (u_int i = 0; i < p.size(); ++i) {
00092 if(p[i]->CanIntersect())
00093 vPrims.push_back(p[i]);
00094 else
00095 p[i]->Refine(vPrims, refineHints, p[i]);
00096 }
00097
00098
00099 nPrims = vPrims.size();
00100
00101
00102 u_int *primsIndexes = new u_int[nPrims + 3];
00103
00104
00105
00106
00107
00108
00109
00110 nNodes = 0;
00111 maxNodes = 1;
00112 for (u_int layer = ((nPrims + maxPrimsPerLeaf - 1) / maxPrimsPerLeaf + 3) / 4; layer != 1; layer = (layer + 3) / 4)
00113 maxNodes += layer;
00114 nodes = AllocAligned<QBVHNode>(maxNodes);
00115 for (u_int i = 0; i < maxNodes; ++i)
00116 nodes[i] = QBVHNode();
00117
00118
00119
00120
00121 BBox *primsBboxes = new BBox[nPrims];
00122 Point *primsCentroids = new Point[nPrims];
00123
00124 BBox centroidsBbox;
00125
00126
00127 for (u_int i = 0; i < vPrims.size(); ++i) {
00128
00129 primsIndexes[i] = i;
00130
00131
00132 primsBboxes[i] = vPrims[i]->WorldBound();
00133 primsBboxes[i].Expand(MachineEpsilon::E(primsBboxes[i]));
00134 primsCentroids[i] = (primsBboxes[i].pMin +
00135 primsBboxes[i].pMax) * .5f;
00136
00137
00138 worldBound = Union(worldBound, primsBboxes[i]);
00139 centroidsBbox = Union(centroidsBbox, primsCentroids[i]);
00140 }
00141
00142
00143 primsIndexes[nPrims] = nPrims - 1;
00144 primsIndexes[nPrims + 1] = nPrims - 1;
00145 primsIndexes[nPrims + 2] = nPrims - 1;
00146
00147
00148 std::stringstream ss;
00149 ss << "Building QBVH, primitives: " << nPrims << ", initial nodes: " << maxNodes;
00150 luxError(LUX_NOERROR, LUX_DEBUG, ss.str().c_str());
00151 nQuads = 0;
00152 BuildTree(0, nPrims, primsIndexes, primsBboxes, primsCentroids,
00153 worldBound, centroidsBbox, -1, 0, 0);
00154
00155 prims = AllocAligned<boost::shared_ptr<Primitive> >(4 * nQuads);
00156 nQuads = 0;
00157 PreSwizzle(0, primsIndexes, vPrims);
00158 ss.str("");
00159 ss << "QBVH completed with " << nNodes << "/" << maxNodes << " nodes";
00160 luxError(LUX_NOERROR, LUX_DEBUG, ss.str().c_str());
00161
00162
00163 delete[] primsBboxes;
00164 delete[] primsCentroids;
00165 delete[] primsIndexes;
00166 }
00167
00168
00169 void QBVHAccel::BuildTree(u_int start, u_int end, u_int *primsIndexes,
00170 BBox *primsBboxes, Point *primsCentroids, const BBox &nodeBbox,
00171 const BBox ¢roidsBbox, int32_t parentIndex, int32_t childIndex, int depth)
00172 {
00173
00174
00175 if (end - start <= maxPrimsPerLeaf) {
00176 CreateTempLeaf(parentIndex, childIndex, start, end, nodeBbox);
00177 return;
00178 }
00179
00180 int32_t currentNode = parentIndex;
00181 int32_t leftChildIndex = childIndex;
00182 int32_t rightChildIndex = childIndex + 1;
00183
00184
00185 int bins[NB_BINS];
00186
00187 BBox binsBbox[NB_BINS];
00188
00189
00190
00191
00192
00193
00194
00195 for (int i = 0; i < NB_BINS; ++i)
00196 bins[i] = 0;
00197
00198 u_int step = (end - start < fullSweepThreshold) ? 1 : skipFactor;
00199
00200
00201
00202
00203 int axis = centroidsBbox.MaximumExtent();
00204
00205
00206
00207 float k0 = centroidsBbox.pMin[axis];
00208 float k1 = NB_BINS / (centroidsBbox.pMax[axis] - k0);
00209
00210
00211
00212 if (isinf(k1)) {
00213 CreateTempLeaf(parentIndex, childIndex, start, end, nodeBbox);
00214 return;
00215 }
00216
00217 for (u_int i = start; i < end; i += step) {
00218 u_int primIndex = primsIndexes[i];
00219
00220
00221
00222 int binId = min(NB_BINS - 1, Floor2Int(k1 * (primsCentroids[primIndex][axis] - k0)));
00223
00224 bins[binId]++;
00225 binsBbox[binId] = Union(binsBbox[binId], primsBboxes[primIndex]);
00226 }
00227
00228
00229
00230
00231
00232
00233 int nbPrimsLeft[NB_BINS];
00234 int nbPrimsRight[NB_BINS];
00235
00236 BBox bboxesLeft[NB_BINS];
00237 BBox bboxesRight[NB_BINS];
00238
00239
00240 float vLeft[NB_BINS];
00241 float vRight[NB_BINS];
00242
00243 BBox currentBboxLeft, currentBboxRight;
00244 int currentNbLeft = 0, currentNbRight = 0;
00245
00246 for (int i = 0; i < NB_BINS; ++i) {
00247
00248
00249
00250 currentNbLeft += bins[i];
00251 nbPrimsLeft[i] = currentNbLeft;
00252
00253 currentBboxLeft = Union(currentBboxLeft, binsBbox[i]);
00254 bboxesLeft[i] = currentBboxLeft;
00255
00256 vLeft[i] = currentBboxLeft.SurfaceArea();
00257
00258
00259
00260
00261
00262 int rightIndex = NB_BINS - 1 - i;
00263 currentNbRight += bins[rightIndex];
00264 nbPrimsRight[rightIndex] = currentNbRight;
00265
00266 currentBboxRight = Union(currentBboxRight, binsBbox[rightIndex]);
00267 bboxesRight[rightIndex] = currentBboxRight;
00268
00269 vRight[rightIndex] = currentBboxRight.SurfaceArea();
00270 }
00271
00272 int minBin = -1;
00273 float minCost = INFINITY;
00274
00275
00276 for (int i = 0; i < NB_BINS - 1; ++i) {
00277 float cost = vLeft[i] * nbPrimsLeft[i] + vRight[i + 1] * nbPrimsRight[i + 1];
00278 if (cost < minCost) {
00279 minBin = i;
00280 minCost = cost;
00281 }
00282 }
00283
00284
00285
00286 if (depth % 2 == 0) {
00287 currentNode = CreateIntermediateNode(parentIndex, childIndex, nodeBbox);
00288 leftChildIndex = 0;
00289 rightChildIndex = 2;
00290
00291 nodes[currentNode].axisMain = axis;
00292 } else {
00293 if (childIndex == 0) {
00294
00295 nodes[currentNode].axisSubLeft = axis;
00296 } else {
00297
00298 nodes[currentNode].axisSubRight = axis;
00299 }
00300 }
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311 float splitPos = centroidsBbox.pMin[axis] + (minBin + 1) *
00312 (centroidsBbox.pMax[axis] - centroidsBbox.pMin[axis]) / NB_BINS;
00313
00314
00315 BBox leftChildBbox, rightChildBbox;
00316 BBox leftChildCentroidsBbox, rightChildCentroidsBbox;
00317
00318 u_int storeIndex = start;
00319 for (u_int i = start; i < end; ++i) {
00320 u_int primIndex = primsIndexes[i];
00321
00322 if (primsCentroids[primIndex][axis] <= splitPos) {
00323
00324 primsIndexes[i] = primsIndexes[storeIndex];
00325 primsIndexes[storeIndex] = primIndex;
00326 ++storeIndex;
00327
00328
00329
00330 leftChildBbox = Union(leftChildBbox, primsBboxes[primIndex]);
00331 leftChildCentroidsBbox = Union(leftChildCentroidsBbox, primsCentroids[primIndex]);
00332 } else {
00333
00334
00335 rightChildBbox = Union(rightChildBbox, primsBboxes[primIndex]);
00336 rightChildCentroidsBbox = Union(rightChildCentroidsBbox, primsCentroids[primIndex]);
00337 }
00338 }
00339
00340
00341 BuildTree(start, storeIndex, primsIndexes, primsBboxes, primsCentroids,
00342 leftChildBbox, leftChildCentroidsBbox, currentNode,
00343 leftChildIndex, depth + 1);
00344 BuildTree(storeIndex, end, primsIndexes, primsBboxes, primsCentroids,
00345 rightChildBbox, rightChildCentroidsBbox, currentNode,
00346 rightChildIndex, depth + 1);
00347 }
00348
00349
00350 void QBVHAccel::CreateTempLeaf(int32_t parentIndex, int32_t childIndex,
00351 u_int start, u_int end, const BBox &nodeBbox)
00352 {
00353
00354 if (parentIndex < 0) {
00355
00356 nNodes = 1;
00357 parentIndex = 0;
00358 }
00359
00360
00361
00362
00363 u_int nbPrimsTotal = end - start;
00364
00365 QBVHNode &node = nodes[parentIndex];
00366
00367 node.SetBBox(childIndex, nodeBbox);
00368
00369 if (nbPrimsTotal == 0) {
00370 node.InitializeLeaf(childIndex, 0, 0);
00371 return;
00372 }
00373
00374
00375 u_int quads = (nbPrimsTotal + 3) >> 2;
00376
00377
00378 node.InitializeLeaf(childIndex, quads, start);
00379
00380 nQuads += quads;
00381 }
00382
00383 void QBVHAccel::PreSwizzle(int32_t nodeIndex, u_int *primsIndexes,
00384 const vector<boost::shared_ptr<Primitive> > &vPrims)
00385 {
00386 for (int i = 0; i < 4; ++i) {
00387 if (nodes[nodeIndex].ChildIsLeaf(i))
00388 CreateSwizzledLeaf(nodeIndex, i, primsIndexes, vPrims);
00389 else
00390 PreSwizzle(nodes[nodeIndex].children[i], primsIndexes, vPrims);
00391 }
00392 }
00393
00394 void QBVHAccel::CreateSwizzledLeaf(int32_t parentIndex, int32_t childIndex,
00395 u_int *primsIndexes, const vector<boost::shared_ptr<Primitive> > &vPrims)
00396 {
00397 QBVHNode &node = nodes[parentIndex];
00398 if (node.LeafIsEmpty(childIndex))
00399 return;
00400 const u_int startQuad = nQuads;
00401 const u_int nbQuads = node.NbQuadsInLeaf(childIndex);
00402
00403 u_int primOffset = node.FirstQuadIndexForLeaf(childIndex);
00404 u_int primNum = 4 * nQuads;
00405
00406 for (u_int q = 0; q < nbQuads; ++q) {
00407 for (int i = 0; i < 4; ++i) {
00408 new (&prims[primNum]) boost::shared_ptr<Primitive>(vPrims[primsIndexes[primOffset]]);
00409 ++primOffset;
00410 ++primNum;
00411 }
00412 }
00413 nQuads += nbQuads;
00414 node.InitializeLeaf(childIndex, nbQuads, startQuad);
00415 }
00416
00417 int32_t QBVHNode::BBoxIntersect(__m128 sseOrig[3], __m128 sseInvDir[3],
00418 const __m128 &sseTMin, const __m128 &sseTMax,
00419 const int sign[3]) const
00420 {
00421 __m128 tMin = sseTMin;
00422 __m128 tMax = sseTMax;
00423
00424
00425 tMin = _mm_max_ps(tMin, _mm_mul_ps(_mm_sub_ps(bboxes[sign[0]][0],
00426 sseOrig[0]), sseInvDir[0]));
00427 tMax = _mm_min_ps(tMax, _mm_mul_ps(_mm_sub_ps(bboxes[1 - sign[0]][0],
00428 sseOrig[0]), sseInvDir[0]));
00429
00430
00431 tMin = _mm_max_ps(tMin, _mm_mul_ps(_mm_sub_ps(bboxes[sign[1]][1],
00432 sseOrig[1]), sseInvDir[1]));
00433 tMax = _mm_min_ps(tMax, _mm_mul_ps(_mm_sub_ps(bboxes[1 - sign[1]][1],
00434 sseOrig[1]), sseInvDir[1]));
00435
00436
00437 tMin = _mm_max_ps(tMin, _mm_mul_ps(_mm_sub_ps(bboxes[sign[2]][2],
00438 sseOrig[2]), sseInvDir[2]));
00439 tMax = _mm_min_ps(tMax, _mm_mul_ps(_mm_sub_ps(bboxes[1 - sign[2]][2],
00440 sseOrig[2]), sseInvDir[2]));
00441
00442
00443 return _mm_movemask_ps(_mm_cmpge_ps(tMax, tMin));
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488 }
00489
00490
00491 bool QBVHAccel::Intersect(const Ray &ray, Intersection *isect) const
00492 {
00493
00494
00495 __m128 sseTMin = _mm_set1_ps(ray.mint);
00496 __m128 sseTMax = _mm_set1_ps(ray.maxt);
00497 __m128 sseInvDir[3];
00498 sseInvDir[0] = _mm_set1_ps(1.f / ray.d.x);
00499 sseInvDir[1] = _mm_set1_ps(1.f / ray.d.y);
00500 sseInvDir[2] = _mm_set1_ps(1.f / ray.d.z);
00501
00502 __m128 sseOrig[3];
00503 sseOrig[0] = _mm_set1_ps(ray.o.x);
00504 sseOrig[1] = _mm_set1_ps(ray.o.y);
00505 sseOrig[2] = _mm_set1_ps(ray.o.z);
00506
00507 int signs[3];
00508 ray.GetDirectionSigns(signs);
00509
00510
00511
00512 bool hit = false;
00513
00514 int todoNode = 0;
00515 int32_t nodeStack[64];
00516 nodeStack[0] = 0;
00517
00518 while (todoNode >= 0) {
00519
00520 if (!QBVHNode::IsLeaf(nodeStack[todoNode])) {
00521 QBVHNode &node = nodes[nodeStack[todoNode]];
00522 --todoNode;
00523
00524 const int32_t visit = node.BBoxIntersect(sseOrig, sseInvDir,
00525 sseTMin, sseTMax, signs);
00526
00527 const int32_t nodeIdx = (signs[node.axisMain] << 2) |
00528 (signs[node.axisSubLeft] << 1) |
00529 (signs[node.axisSubRight]);
00530
00531 boost::int16_t bboxOrder = pathTable[visit * 8 + nodeIdx];
00532
00533
00534 for (int i = 0; i < 4; ++i) {
00535 if (bboxOrder & 0x4)
00536 break;
00537 ++todoNode;
00538 nodeStack[todoNode] = node.children[bboxOrder & 0x3];
00539 bboxOrder >>= 4;
00540 }
00541 } else {
00542
00543
00544
00545 const int32_t leafData = nodeStack[todoNode];
00546 --todoNode;
00547
00548 if (QBVHNode::IsEmpty(leafData))
00549 continue;
00550
00551
00552 const u_int nbQuadPrimitives = QBVHNode::NbQuadPrimitives(leafData);
00553
00554 const u_int offset = QBVHNode::FirstQuadIndex(leafData);
00555
00556 for (u_int primNumber = 4 * offset; primNumber < 4 * (offset + nbQuadPrimitives); ++primNumber) {
00557 hit |= prims[primNumber]->Intersect(ray, isect);
00558 }
00559
00560
00561 sseTMax = _mm_min_ps(sseTMax, _mm_set1_ps(ray.maxt));
00562 }
00563 }
00564
00565 return hit;
00566 }
00567
00568
00569 bool QBVHAccel::IntersectP(const Ray &ray) const
00570 {
00571
00572
00573 __m128 sseTMin = _mm_set1_ps(ray.mint);
00574 __m128 sseTMax = _mm_set1_ps(ray.maxt);
00575 __m128 sseInvDir[3];
00576 sseInvDir[0] = _mm_set1_ps(1.f / ray.d.x);
00577 sseInvDir[1] = _mm_set1_ps(1.f / ray.d.y);
00578 sseInvDir[2] = _mm_set1_ps(1.f / ray.d.z);
00579
00580 __m128 sseOrig[3];
00581 sseOrig[0] = _mm_set1_ps(ray.o.x);
00582 sseOrig[1] = _mm_set1_ps(ray.o.y);
00583 sseOrig[2] = _mm_set1_ps(ray.o.z);
00584
00585
00586
00587
00588 int todoNode = 0;
00589 int32_t nodeStack[64];
00590 nodeStack[0] = 0;
00591
00592 int signs[3];
00593 ray.GetDirectionSigns(signs);
00594
00595 while (todoNode >= 0) {
00596
00597 if (!QBVHNode::IsLeaf(nodeStack[todoNode])) {
00598 QBVHNode &node = nodes[nodeStack[todoNode]];
00599 --todoNode;
00600
00601 const int32_t visit = node.BBoxIntersect(sseOrig, sseInvDir,
00602 sseTMin, sseTMax, signs);
00603
00604 const int32_t nodeIdx = (signs[node.axisMain] << 2) |
00605 (signs[node.axisSubLeft] << 1) |
00606 (signs[node.axisSubRight]);
00607
00608 boost::int16_t bboxOrder = pathTable[visit * 8 + nodeIdx];
00609
00610
00611 for (int i = 0; i < 4; i++) {
00612 if (bboxOrder & 0x4)
00613 break;
00614 ++todoNode;
00615 nodeStack[todoNode] = node.children[bboxOrder & 0x3];
00616 bboxOrder = bboxOrder >> 4;
00617 }
00618 } else {
00619
00620
00621
00622 const int32_t leafData = nodeStack[todoNode];
00623 --todoNode;
00624
00625 if (QBVHNode::IsEmpty(leafData))
00626 continue;
00627
00628
00629 const u_int nbQuadPrimitives = QBVHNode::NbQuadPrimitives(leafData);
00630
00631 const u_int offset = QBVHNode::FirstQuadIndex(leafData);
00632
00633 for (u_int primNumber = 4 * offset; primNumber < 4 * (offset + nbQuadPrimitives); ++primNumber) {
00634 if (prims[primNumber]->IntersectP(ray))
00635 return true;
00636 }
00637 }
00638 }
00639
00640 return false;
00641 }
00642
00643
00644 QBVHAccel::~QBVHAccel()
00645 {
00646 for (u_int i = 0; i < nPrims; ++i)
00647 prims[i].~shared_ptr();
00648 FreeAligned(prims);
00649 FreeAligned(nodes);
00650 }
00651
00652
00653 BBox QBVHAccel::WorldBound() const
00654 {
00655 return worldBound;
00656 }
00657
00658 void QBVHAccel::GetPrimitives(vector<boost::shared_ptr<Primitive> > &primitives)
00659 {
00660 primitives.reserve(nPrims);
00661 for(u_int i = 0; i < nPrims; ++i)
00662 primitives.push_back(prims[i]);
00663 }
00664
00665 Aggregate* QBVHAccel::CreateAccelerator(const vector<boost::shared_ptr<Primitive> > &prims, const ParamSet &ps)
00666 {
00667 int maxPrimsPerLeaf = ps.FindOneInt("maxprimsperleaf", 4);
00668 float fullSweepThreshold = ps.FindOneFloat("fullsweepthreshold", 4 * maxPrimsPerLeaf);
00669 int skipFactor = ps.FindOneInt("skipfactor", 1);
00670 return new QBVHAccel(prims, maxPrimsPerLeaf, fullSweepThreshold, skipFactor);
00671
00672 }
00673
00674 static DynamicLoader::RegisterAccelerator<QBVHAccel> r("qbvh");
00675
00676 }